You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/19 21:34:04 UTC
[16/51] [partial] incubator-joshua git commit: Converted KenLM into a
submodule
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/search.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/search.c b/ext/kenlm/jam-files/engine/search.c
deleted file mode 100644
index b2beada..0000000
--- a/ext/kenlm/jam-files/engine/search.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/* This file is ALSO:
- * Copyright 2001-2004 David Abrahams.
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-#include "jam.h"
-#include "search.h"
-
-#include "compile.h"
-#include "filesys.h"
-#include "hash.h"
-#include "lists.h"
-#include "object.h"
-#include "pathsys.h"
-#include "strings.h"
-#include "timestamp.h"
-#include "variable.h"
-
-#include <string.h>
-
-
-typedef struct _binding
-{
- OBJECT * binding;
- OBJECT * target;
-} BINDING;
-
-static struct hash * explicit_bindings = 0;
-
-
-void call_bind_rule( OBJECT * target_, OBJECT * boundname_ )
-{
- LIST * const bind_rule = var_get( root_module(), constant_BINDRULE );
- if ( !list_empty( bind_rule ) )
- {
- OBJECT * target = object_copy( target_ );
- OBJECT * boundname = object_copy( boundname_ );
- if ( boundname && target )
- {
- /* Prepare the argument list. */
- FRAME frame[ 1 ];
- frame_init( frame );
-
- /* First argument is the target name. */
- lol_add( frame->args, list_new( target ) );
-
- lol_add( frame->args, list_new( boundname ) );
- if ( lol_get( frame->args, 1 ) )
- {
- OBJECT * rulename = list_front( bind_rule );
- list_free( evaluate_rule( bindrule( rulename, root_module() ), rulename, frame ) );
- }
-
- /* Clean up */
- frame_free( frame );
- }
- else
- {
- if ( boundname )
- object_free( boundname );
- if ( target )
- object_free( target );
- }
- }
-}
-
-/* Records the binding of a target with an explicit LOCATE. */
-void set_explicit_binding( OBJECT * target, OBJECT * locate )
-{
- OBJECT * boundname;
- OBJECT * key;
- PATHNAME f[ 1 ];
- string buf[ 1 ];
- int found;
- BINDING * ba;
-
- if ( !explicit_bindings )
- explicit_bindings = hashinit( sizeof( BINDING ), "explicitly specified "
- "locations" );
-
- string_new( buf );
-
- /* Parse the filename. */
- path_parse( object_str( target ), f );
-
- /* Ignore the grist. */
- f->f_grist.ptr = 0;
- f->f_grist.len = 0;
-
- /* Root the target path at the given location. */
- f->f_root.ptr = object_str( locate );
- f->f_root.len = strlen( object_str( locate ) );
-
- path_build( f, buf );
- boundname = object_new( buf->value );
- if ( DEBUG_SEARCH )
- printf( "explicit locate %s: %s\n", object_str( target ), buf->value );
- string_free( buf );
- key = path_as_key( boundname );
- object_free( boundname );
-
- ba = (BINDING *)hash_insert( explicit_bindings, key, &found );
- if ( !found )
- {
- ba->binding = key;
- ba->target = target;
- }
- else
- object_free( key );
-}
-
-/*
- * search.c - find a target along $(SEARCH) or $(LOCATE).
- *
- * First, check if LOCATE is set. If so, use it to determine the location of
- * target and return, regardless of whether anything exists at that location.
- *
- * Second, examine all directories in SEARCH. If the file exists there or there
- * is another target with the same name already placed at this location via the
- * LOCATE setting, stop and return the location. In case of a previous target,
- * return its name via the 'another_target' argument.
- *
- * This behaviour allows handling dependencies on generated files.
- *
- * If caller does not expect that the target is generated, 0 can be passed as
- * 'another_target'.
- */
-
-OBJECT * search( OBJECT * target, timestamp * const time,
- OBJECT * * another_target, int const file )
-{
- PATHNAME f[ 1 ];
- LIST * varlist;
- string buf[ 1 ];
- int found = 0;
- OBJECT * boundname = 0;
-
- if ( another_target )
- *another_target = 0;
-
- if ( !explicit_bindings )
- explicit_bindings = hashinit( sizeof( BINDING ), "explicitly specified "
- "locations" );
-
- string_new( buf );
-
- /* Parse the filename. */
- path_parse( object_str( target ), f );
-
- f->f_grist.ptr = 0;
- f->f_grist.len = 0;
-
- varlist = var_get( root_module(), constant_LOCATE );
- if ( !list_empty( varlist ) )
- {
- OBJECT * key;
- f->f_root.ptr = object_str( list_front( varlist ) );
- f->f_root.len = strlen( object_str( list_front( varlist ) ) );
-
- path_build( f, buf );
-
- if ( DEBUG_SEARCH )
- printf( "locate %s: %s\n", object_str( target ), buf->value );
-
- key = object_new( buf->value );
- timestamp_from_path( time, key );
- object_free( key );
- found = 1;
- }
- else if ( varlist = var_get( root_module(), constant_SEARCH ),
- !list_empty( varlist ) )
- {
- LISTITER iter = list_begin( varlist );
- LISTITER const end = list_end( varlist );
- for ( ; iter != end; iter = list_next( iter ) )
- {
- BINDING * ba;
- file_info_t * ff;
- OBJECT * key;
- OBJECT * test_path;
-
- f->f_root.ptr = object_str( list_item( iter ) );
- f->f_root.len = strlen( object_str( list_item( iter ) ) );
-
- string_truncate( buf, 0 );
- path_build( f, buf );
-
- if ( DEBUG_SEARCH )
- printf( "search %s: %s\n", object_str( target ), buf->value );
-
- test_path = object_new( buf->value );
- key = path_as_key( test_path );
- object_free( test_path );
- ff = file_query( key );
- timestamp_from_path( time, key );
-
- if ( ( ba = (BINDING *)hash_find( explicit_bindings, key ) ) )
- {
- if ( DEBUG_SEARCH )
- printf(" search %s: found explicitly located target %s\n",
- object_str( target ), object_str( ba->target ) );
- if ( another_target )
- *another_target = ba->target;
- found = 1;
- object_free( key );
- break;
- }
- else if ( ff )
- {
- if ( !file || ff->is_file )
- {
- found = 1;
- object_free( key );
- break;
- }
- }
- object_free( key );
- }
- }
-
- if ( !found )
- {
- /* Look for the obvious. */
- /* This is a questionable move. Should we look in the obvious place if
- * SEARCH is set?
- */
- OBJECT * key;
-
- f->f_root.ptr = 0;
- f->f_root.len = 0;
-
- string_truncate( buf, 0 );
- path_build( f, buf );
-
- if ( DEBUG_SEARCH )
- printf( "search %s: %s\n", object_str( target ), buf->value );
-
- key = object_new( buf->value );
- timestamp_from_path( time, key );
- object_free( key );
- }
-
- boundname = object_new( buf->value );
- string_free( buf );
-
- /* Prepare a call to BINDRULE if the variable is set. */
- call_bind_rule( target, boundname );
-
- return boundname;
-}
-
-
-static void free_binding( void * xbinding, void * data )
-{
- object_free( ( (BINDING *)xbinding )->binding );
-}
-
-
-void search_done( void )
-{
- if ( explicit_bindings )
- {
- hashenumerate( explicit_bindings, free_binding, 0 );
- hashdone( explicit_bindings );
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/search.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/search.h b/ext/kenlm/jam-files/engine/search.h
deleted file mode 100644
index 7e74f79..0000000
--- a/ext/kenlm/jam-files/engine/search.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 1993, 1995 Christopher Seiwald.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/*
- * search.h - find a target along $(SEARCH) or $(LOCATE)
- */
-
-#ifndef SEARCH_SW20111118_H
-#define SEARCH_SW20111118_H
-
-#include "object.h"
-#include "timestamp.h"
-
-void set_explicit_binding( OBJECT * target, OBJECT * locate );
-OBJECT * search( OBJECT * target, timestamp * const time,
- OBJECT * * another_target, int const file );
-void search_done( void );
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/strings.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/strings.c b/ext/kenlm/jam-files/engine/strings.c
deleted file mode 100644
index 3d3e19b..0000000
--- a/ext/kenlm/jam-files/engine/strings.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/* Copyright David Abrahams 2004. Distributed under the Boost */
-/* Software License, Version 1.0. (See accompanying */
-/* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */
-
-#include "jam.h"
-#include "strings.h"
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-
-#ifndef NDEBUG
-# define JAM_STRING_MAGIC ((char)0xcf)
-# define JAM_STRING_MAGIC_SIZE 4
-static void assert_invariants( string * self )
-{
- int i;
-
- if ( self->value == 0 )
- {
- assert( self->size == 0 );
- assert( self->capacity == 0 );
- assert( self->opt[ 0 ] == 0 );
- return;
- }
-
- assert( self->size < self->capacity );
- assert( ( self->capacity <= sizeof( self->opt ) ) == ( self->value == self->opt ) );
- assert( self->value[ self->size ] == 0 );
- /* String objects modified manually after construction to contain embedded
- * '\0' characters are considered structurally valid.
- */
- assert( strlen( self->value ) <= self->size );
-
- for ( i = 0; i < 4; ++i )
- {
- assert( self->magic[ i ] == JAM_STRING_MAGIC );
- assert( self->value[ self->capacity + i ] == JAM_STRING_MAGIC );
- }
-}
-#else
-# define JAM_STRING_MAGIC_SIZE 0
-# define assert_invariants(x) do {} while (0)
-#endif
-
-
-void string_new( string * s )
-{
- s->value = s->opt;
- s->size = 0;
- s->capacity = sizeof( s->opt );
- s->opt[ 0 ] = 0;
-#ifndef NDEBUG
- memset( s->magic, JAM_STRING_MAGIC, sizeof( s->magic ) );
-#endif
- assert_invariants( s );
-}
-
-
-void string_free( string * s )
-{
- assert_invariants( s );
- if ( s->value != s->opt )
- BJAM_FREE( s->value );
- string_new( s );
-}
-
-
-static void string_reserve_internal( string * self, size_t capacity )
-{
- if ( self->value == self->opt )
- {
- self->value = (char *)BJAM_MALLOC_ATOMIC( capacity +
- JAM_STRING_MAGIC_SIZE );
- self->value[ 0 ] = 0;
- strncat( self->value, self->opt, sizeof(self->opt) );
- assert( strlen( self->value ) <= self->capacity && "Regression test" );
- }
- else
- {
- self->value = (char *)BJAM_REALLOC( self->value, capacity +
- JAM_STRING_MAGIC_SIZE );
- }
-#ifndef NDEBUG
- memcpy( self->value + capacity, self->magic, JAM_STRING_MAGIC_SIZE );
-#endif
- self->capacity = capacity;
-}
-
-
-void string_reserve( string * self, size_t capacity )
-{
- assert_invariants( self );
- if ( capacity <= self->capacity )
- return;
- string_reserve_internal( self, capacity );
- assert_invariants( self );
-}
-
-
-static void extend_full( string * self, char const * start, char const * finish )
-{
- size_t new_size = self->capacity + ( finish - start );
- size_t new_capacity = self->capacity;
- size_t old_size = self->capacity;
- while ( new_capacity < new_size + 1)
- new_capacity <<= 1;
- string_reserve_internal( self, new_capacity );
- memcpy( self->value + old_size, start, new_size - old_size );
- self->value[ new_size ] = 0;
- self->size = new_size;
-}
-
-static void maybe_reserve( string * self, size_t new_size )
-{
- size_t capacity = self->capacity;
- if ( capacity <= new_size )
- {
- size_t new_capacity = capacity;
- while ( new_capacity <= new_size )
- new_capacity <<= 1;
- string_reserve_internal( self, new_capacity );
- }
-}
-
-
-void string_append( string * self, char const * rhs )
-{
- size_t rhs_size = strlen( rhs );
- size_t new_size = self->size + rhs_size;
- assert_invariants( self );
-
- maybe_reserve( self, new_size );
-
- memcpy( self->value + self->size, rhs, rhs_size + 1 );
- self->size = new_size;
-
- assert_invariants( self );
-}
-
-
-void string_append_range( string * self, char const * start, char const * finish )
-{
- size_t rhs_size = finish - start;
- size_t new_size = self->size + rhs_size;
- assert_invariants( self );
-
- maybe_reserve( self, new_size );
-
- memcpy( self->value + self->size, start, rhs_size );
- self->size = new_size;
- self->value[ new_size ] = 0;
-
- assert_invariants( self );
-}
-
-
-void string_copy( string * s, char const * rhs )
-{
- string_new( s );
- string_append( s, rhs );
-}
-
-void string_truncate( string * self, size_t n )
-{
- assert_invariants( self );
- assert( n <= self->capacity );
- self->value[ self->size = n ] = 0;
- assert_invariants( self );
-}
-
-
-void string_pop_back( string * self )
-{
- string_truncate( self, self->size - 1 );
-}
-
-
-void string_push_back( string * self, char x )
-{
- string_append_range( self, &x, &x + 1 );
-}
-
-
-char string_back( string * self )
-{
- assert_invariants( self );
- return self->value[ self->size - 1 ];
-}
-
-
-#ifndef NDEBUG
-void string_unit_test()
-{
- {
- string s[ 1 ];
- int i;
- int const limit = sizeof( s->opt ) * 2 + 2;
- string_new( s );
- assert( s->value == s->opt );
- for ( i = 0; i < limit; ++i )
- {
- string_push_back( s, (char)( i + 1 ) );
- assert( s->size == i + 1 );
- }
- assert( s->size == limit );
- assert( s->value != s->opt );
- for ( i = 0; i < limit; ++i )
- assert( s->value[ i ] == (char)( i + 1 ) );
- string_free( s );
- }
-
- {
- char * const original = " \n\t\v Foo \r\n\v \tBar\n\n\r\r\t\n\v\t \t";
- string copy[ 1 ];
- string_copy( copy, original );
- assert( !strcmp( copy->value, original ) );
- assert( copy->size == strlen( original ) );
- string_free( copy );
- }
-}
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/strings.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/strings.h b/ext/kenlm/jam-files/engine/strings.h
deleted file mode 100644
index 749f287..0000000
--- a/ext/kenlm/jam-files/engine/strings.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2004. David Abrahams
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-#ifndef STRINGS_DWA20011024_H
-#define STRINGS_DWA20011024_H
-
-#include <stddef.h>
-
-typedef struct string
-{
- char * value;
- unsigned long size;
- unsigned long capacity;
- char opt[ 32 ];
-#ifndef NDEBUG
- char magic[ 4 ];
-#endif
-} string;
-
-void string_new( string * );
-void string_copy( string *, char const * );
-void string_free( string * );
-void string_append( string *, char const * );
-void string_append_range( string *, char const *, char const * );
-void string_push_back( string * s, char x );
-void string_reserve( string *, size_t );
-void string_truncate( string *, size_t );
-void string_pop_back( string * );
-char string_back( string * );
-void string_unit_test();
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/subst.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/subst.c b/ext/kenlm/jam-files/engine/subst.c
deleted file mode 100644
index a5fcee0..0000000
--- a/ext/kenlm/jam-files/engine/subst.c
+++ /dev/null
@@ -1,116 +0,0 @@
-#include "jam.h"
-#include "subst.h"
-
-#include "builtins.h"
-#include "frames.h"
-#include "hash.h"
-#include "lists.h"
-
-#include <stddef.h>
-
-
-typedef struct regex_entry
-{
- OBJECT * pattern;
- regexp * regex;
-} regex_entry;
-
-static struct hash * regex_hash;
-
-
-regexp * regex_compile( OBJECT * pattern )
-{
- int found;
- regex_entry * e ;
-
- if ( !regex_hash )
- regex_hash = hashinit( sizeof( regex_entry ), "regex" );
-
- e = (regex_entry *)hash_insert( regex_hash, pattern, &found );
- if ( !found )
- {
- e->pattern = object_copy( pattern );
- e->regex = regcomp( (char *)pattern );
- }
-
- return e->regex;
-}
-
-
-LIST * builtin_subst( FRAME * frame, int flags )
-{
- LIST * result = L0;
- LIST * const arg1 = lol_get( frame->args, 0 );
- LISTITER iter = list_begin( arg1 );
- LISTITER const end = list_end( arg1 );
-
- if ( iter != end && list_next( iter ) != end && list_next( list_next( iter )
- ) != end )
- {
- char const * const source = object_str( list_item( iter ) );
- OBJECT * const pattern = list_item( list_next( iter ) );
- regexp * const repat = regex_compile( pattern );
-
- if ( regexec( repat, (char *)source) )
- {
- LISTITER subst = list_next( iter );
-
- while ( ( subst = list_next( subst ) ) != end )
- {
-#define BUFLEN 4096
- char buf[ BUFLEN + 1 ];
- char const * in = object_str( list_item( subst ) );
- char * out = buf;
-
- for ( ; *in && out < buf + BUFLEN; ++in )
- {
- if ( *in == '\\' || *in == '$' )
- {
- ++in;
- if ( *in == 0 )
- break;
- if ( *in >= '0' && *in <= '9' )
- {
- unsigned int const n = *in - '0';
- size_t const srclen = repat->endp[ n ] -
- repat->startp[ n ];
- size_t const remaining = buf + BUFLEN - out;
- size_t const len = srclen < remaining
- ? srclen
- : remaining;
- memcpy( out, repat->startp[ n ], len );
- out += len;
- continue;
- }
- /* fall through and copy the next character */
- }
- *out++ = *in;
- }
- *out = 0;
-
- result = list_push_back( result, object_new( buf ) );
-#undef BUFLEN
- }
- }
- }
-
- return result;
-}
-
-
-static void free_regex( void * xregex, void * data )
-{
- regex_entry * const regex = (regex_entry *)xregex;
- object_free( regex->pattern );
- BJAM_FREE( regex->regex );
-}
-
-
-void regex_done()
-{
- if ( regex_hash )
- {
- hashenumerate( regex_hash, free_regex, (void *)0 );
- hashdone( regex_hash );
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/subst.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/subst.h b/ext/kenlm/jam-files/engine/subst.h
deleted file mode 100644
index 7dc09a6..0000000
--- a/ext/kenlm/jam-files/engine/subst.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* Copyright 2001-2004 David Abrahams.
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
- */
-
-#ifndef SUBST_JG20120722_H
-#define SUBST_JG20120722_H
-
-#include "object.h"
-#include "regexp.h"
-
-regexp * regex_compile( OBJECT * pattern );
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/timestamp.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/timestamp.c b/ext/kenlm/jam-files/engine/timestamp.c
deleted file mode 100644
index 0d01698..0000000
--- a/ext/kenlm/jam-files/engine/timestamp.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/* This file is ALSO:
- * Copyright 2001-2004 David Abrahams.
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-/*
- * timestamp.c - get the timestamp of a file or archive member
- *
- * External routines:
- * timestamp_from_path() - return timestamp for a path, if present
- * timestamp_done() - free timestamp tables
- *
- * Internal routines:
- * time_enter() - internal worker callback for scanning archives &
- * directories
- * free_timestamps() - worker function for freeing timestamp table contents
- */
-
-#include "jam.h"
-#include "timestamp.h"
-
-#include "filesys.h"
-#include "hash.h"
-#include "object.h"
-#include "pathsys.h"
-#include "strings.h"
-
-
-/*
- * BINDING - all known files
- */
-
-typedef struct _binding
-{
- OBJECT * name;
- short flags;
-
-#define BIND_SCANNED 0x01 /* if directory or arch, has been scanned */
-
- short progress;
-
-#define BIND_INIT 0 /* never seen */
-#define BIND_NOENTRY 1 /* timestamp requested but file never found */
-#define BIND_SPOTTED 2 /* file found but not timed yet */
-#define BIND_MISSING 3 /* file found but can not get timestamp */
-#define BIND_FOUND 4 /* file found and time stamped */
-
- /* update time - cleared if the there is nothing to bind */
- timestamp time;
-} BINDING;
-
-static struct hash * bindhash = 0;
-
-static void time_enter( void *, OBJECT *, int const found,
- timestamp const * const );
-
-static char * time_progress[] =
-{
- "INIT",
- "NOENTRY",
- "SPOTTED",
- "MISSING",
- "FOUND"
-};
-
-
-#ifdef OS_NT
-/*
- * timestamp_from_filetime() - Windows FILETIME --> timestamp conversion
- *
- * Lifted shamelessly from the CPython implementation.
- */
-
-void timestamp_from_filetime( timestamp * const t, FILETIME const * const ft )
-{
- /* Seconds between 1.1.1601 and 1.1.1970 */
- static __int64 const secs_between_epochs = 11644473600;
-
- /* We can not simply cast and dereference a FILETIME, since it might not be
- * aligned properly. __int64 type variables are expected to be aligned to an
- * 8 byte boundary while FILETIME structures may be aligned to any 4 byte
- * boundary. Using an incorrectly aligned __int64 variable may cause a
- * performance penalty on some platforms or even exceptions on others
- * (documented on MSDN).
- */
- __int64 in;
- memcpy( &in, ft, sizeof( in ) );
-
- /* FILETIME resolution: 100ns. */
- timestamp_init( t, (time_t)( ( in / 10000000 ) - secs_between_epochs ),
- (int)( in % 10000000 ) * 100 );
-}
-#endif /* OS_NT */
-
-
-void timestamp_clear( timestamp * const time )
-{
- time->secs = time->nsecs = 0;
-}
-
-
-int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs )
-{
- return lhs->secs == rhs->secs
- ? lhs->nsecs - rhs->nsecs
- : lhs->secs - rhs->secs;
-}
-
-
-void timestamp_copy( timestamp * const target, timestamp const * const source )
-{
- target->secs = source->secs;
- target->nsecs = source->nsecs;
-}
-
-
-void timestamp_current( timestamp * const t )
-{
-#ifdef OS_NT
- /* GetSystemTimeAsFileTime()'s resolution seems to be about 15 ms on Windows
- * XP and under a millisecond on Windows 7.
- */
- FILETIME ft;
- GetSystemTimeAsFileTime( &ft );
- timestamp_from_filetime( t, &ft );
-#else /* OS_NT */
- timestamp_init( t, time( 0 ), 0 );
-#endif /* OS_NT */
-}
-
-
-int timestamp_empty( timestamp const * const time )
-{
- return !time->secs && !time->nsecs;
-}
-
-
-/*
- * timestamp_from_path() - return timestamp for a path, if present
- */
-
-void timestamp_from_path( timestamp * const time, OBJECT * const path )
-{
- PROFILE_ENTER( timestamp );
-
- PATHNAME f1;
- PATHNAME f2;
- int found;
- BINDING * b;
- string buf[ 1 ];
-
-
- if ( file_time( path, time ) < 0 )
- timestamp_clear( time );
-
- PROFILE_EXIT( timestamp );
-}
-
-
-void timestamp_init( timestamp * const time, time_t const secs, int const nsecs
- )
-{
- time->secs = secs;
- time->nsecs = nsecs;
-}
-
-
-void timestamp_max( timestamp * const max, timestamp const * const lhs,
- timestamp const * const rhs )
-{
- if ( timestamp_cmp( lhs, rhs ) > 0 )
- timestamp_copy( max, lhs );
- else
- timestamp_copy( max, rhs );
-}
-
-
-static char const * timestamp_formatstr( timestamp const * const time,
- char const * const format )
-{
- static char result1[ 500 ];
- static char result2[ 500 ];
- strftime( result1, sizeof( result1 ) / sizeof( *result1 ), format, gmtime(
- &time->secs ) );
- sprintf( result2, result1, time->nsecs );
- return result2;
-}
-
-
-char const * timestamp_str( timestamp const * const time )
-{
- return timestamp_formatstr( time, "%Y-%m-%d %H:%M:%S.%%09d +0000" );
-}
-
-
-char const * timestamp_timestr( timestamp const * const time )
-{
- return timestamp_formatstr( time, "%H:%M:%S.%%09d" );
-}
-
-
-/*
- * time_enter() - internal worker callback for scanning archives & directories
- */
-
-static void time_enter( void * closure, OBJECT * target, int const found,
- timestamp const * const time )
-{
- int item_found;
- BINDING * b;
- struct hash * const bindhash = (struct hash *)closure;
-
- target = path_as_key( target );
-
- b = (BINDING *)hash_insert( bindhash, target, &item_found );
- if ( !item_found )
- {
- b->name = object_copy( target );
- b->flags = 0;
- }
-
- timestamp_copy( &b->time, time );
- b->progress = found ? BIND_FOUND : BIND_SPOTTED;
-
- if ( DEBUG_BINDSCAN )
- printf( "time ( %s ) : %s\n", object_str( target ), time_progress[
- b->progress ] );
-
- object_free( target );
-}
-
-
-/*
- * free_timestamps() - worker function for freeing timestamp table contents
- */
-
-static void free_timestamps( void * xbinding, void * data )
-{
- object_free( ( (BINDING *)xbinding )->name );
-}
-
-
-/*
- * timestamp_done() - free timestamp tables
- */
-
-void timestamp_done()
-{
- if ( bindhash )
- {
- hashenumerate( bindhash, free_timestamps, 0 );
- hashdone( bindhash );
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/timestamp.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/timestamp.h b/ext/kenlm/jam-files/engine/timestamp.h
deleted file mode 100644
index aaf1310..0000000
--- a/ext/kenlm/jam-files/engine/timestamp.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 1993, 1995 Christopher Seiwald.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/*
- * timestamp.h - get the timestamp of a file or archive member
- */
-
-#ifndef TIMESTAMP_H_SW_2011_11_18
-#define TIMESTAMP_H_SW_2011_11_18
-
-#include "object.h"
-
-#ifdef OS_NT
-# define WIN32_LEAN_AND_MEAN
-# include <windows.h>
-#endif
-
-#include <time.h>
-
-typedef struct timestamp
-{
- time_t secs;
- int nsecs;
-} timestamp;
-
-void timestamp_clear( timestamp * const );
-int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs );
-void timestamp_copy( timestamp * const target, timestamp const * const source );
-void timestamp_current( timestamp * const );
-int timestamp_empty( timestamp const * const );
-void timestamp_from_path( timestamp * const, OBJECT * const path );
-void timestamp_init( timestamp * const, time_t const secs, int const nsecs );
-void timestamp_max( timestamp * const max, timestamp const * const lhs,
- timestamp const * const rhs );
-char const * timestamp_str( timestamp const * const );
-char const * timestamp_timestr( timestamp const * const );
-
-#ifdef OS_NT
-void timestamp_from_filetime( timestamp * const, FILETIME const * const );
-#endif
-
-void timestamp_done();
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/variable.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/variable.c b/ext/kenlm/jam-files/engine/variable.c
deleted file mode 100644
index 2c292fb..0000000
--- a/ext/kenlm/jam-files/engine/variable.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Copyright 1993, 2000 Christopher Seiwald.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/* This file is ALSO:
- * Copyright 2001-2004 David Abrahams.
- * Copyright 2005 Reece H. Dunn.
- * Copyright 2005 Rene Rivera.
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-/*
- * variable.c - handle Jam multi-element variables.
- *
- * External routines:
- *
- * var_defines() - load a bunch of variable=value settings
- * var_get() - get value of a user defined symbol
- * var_set() - set a variable in jam's user defined symbol table.
- * var_swap() - swap a variable's value with the given one
- * var_done() - free variable tables
- *
- * Internal routines:
- *
- * var_enter() - make new var symbol table entry, returning var ptr
- * var_dump() - dump a variable to stdout
- */
-
-#include "jam.h"
-#include "variable.h"
-
-#include "filesys.h"
-#include "hash.h"
-#include "modules.h"
-#include "parse.h"
-#include "pathsys.h"
-#include "strings.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-
-/*
- * VARIABLE - a user defined multi-value variable
- */
-
-typedef struct _variable VARIABLE ;
-
-struct _variable
-{
- OBJECT * symbol;
- LIST * value;
-};
-
-static LIST * * var_enter( struct module_t *, OBJECT * symbol );
-static void var_dump( OBJECT * symbol, LIST * value, char * what );
-
-
-/*
- * var_defines() - load a bunch of variable=value settings
- *
- * If preprocess is false, take the value verbatim.
- *
- * Otherwise, if the variable value is enclosed in quotes, strip the quotes.
- * Otherwise, if variable name ends in PATH, split value at :'s.
- * Otherwise, split the value at blanks.
- */
-
-void var_defines( struct module_t * module, char * const * e, int preprocess )
-{
- string buf[ 1 ];
-
- string_new( buf );
-
- for ( ; *e; ++e )
- {
- char * val;
-
- if ( ( val = strchr( *e, '=' ) )
-#if defined( OS_MAC )
- /* On the mac (MPW), the var=val is actually var\0val */
- /* Think different. */
- || ( val = *e + strlen( *e ) )
-#endif
- )
- {
- LIST * l = L0;
- size_t const len = strlen( val + 1 );
- int const quoted = ( val[ 1 ] == '"' ) && ( val[ len ] == '"' ) &&
- ( len > 1 );
-
- if ( quoted && preprocess )
- {
- string_append_range( buf, val + 2, val + len );
- l = list_push_back( l, object_new( buf->value ) );
- string_truncate( buf, 0 );
- }
- else
- {
- char * p;
- char * pp;
- char split =
-#if defined( OPT_NO_EXTERNAL_VARIABLE_SPLIT )
- '\0'
-#elif defined( OS_MAC )
- ','
-#else
- ' '
-#endif
- ;
-
- /* Split *PATH at :'s, not spaces. */
- if ( val - 4 >= *e )
- {
- if ( !strncmp( val - 4, "PATH", 4 ) ||
- !strncmp( val - 4, "Path", 4 ) ||
- !strncmp( val - 4, "path", 4 ) )
- split = SPLITPATH;
- }
-
- /* Do the split. */
- for
- (
- pp = val + 1;
- preprocess && ( ( p = strchr( pp, split ) ) != 0 );
- pp = p + 1
- )
- {
- string_append_range( buf, pp, p );
- l = list_push_back( l, object_new( buf->value ) );
- string_truncate( buf, 0 );
- }
-
- l = list_push_back( l, object_new( pp ) );
- }
-
- /* Get name. */
- string_append_range( buf, *e, val );
- {
- OBJECT * const varname = object_new( buf->value );
- var_set( module, varname, l, VAR_SET );
- object_free( varname );
- }
- string_truncate( buf, 0 );
- }
- }
- string_free( buf );
-}
-
-
-/* Last returned variable value saved so we may clear it in var_done(). */
-static LIST * saved_var = L0;
-
-
-/*
- * var_get() - get value of a user defined symbol
- *
- * Returns NULL if symbol unset.
- */
-
-LIST * var_get( struct module_t * module, OBJECT * symbol )
-{
- LIST * result = L0;
-#ifdef OPT_AT_FILES
- /* Some "fixed" variables... */
- if ( object_equal( symbol, constant_TMPDIR ) )
- {
- list_free( saved_var );
- result = saved_var = list_new( object_new( path_tmpdir()->value ) );
- }
- else if ( object_equal( symbol, constant_TMPNAME ) )
- {
- list_free( saved_var );
- result = saved_var = list_new( path_tmpnam() );
- }
- else if ( object_equal( symbol, constant_TMPFILE ) )
- {
- list_free( saved_var );
- result = saved_var = list_new( path_tmpfile() );
- }
- else if ( object_equal( symbol, constant_STDOUT ) )
- {
- list_free( saved_var );
- result = saved_var = list_new( object_copy( constant_STDOUT ) );
- }
- else if ( object_equal( symbol, constant_STDERR ) )
- {
- list_free( saved_var );
- result = saved_var = list_new( object_copy( constant_STDERR ) );
- }
- else
-#endif
- {
- VARIABLE * v;
- int n;
-
- if ( ( n = module_get_fixed_var( module, symbol ) ) != -1 )
- {
- if ( DEBUG_VARGET )
- var_dump( symbol, module->fixed_variables[ n ], "get" );
- result = module->fixed_variables[ n ];
- }
- else if ( module->variables && ( v = (VARIABLE *)hash_find(
- module->variables, symbol ) ) )
- {
- if ( DEBUG_VARGET )
- var_dump( v->symbol, v->value, "get" );
- result = v->value;
- }
- }
- return result;
-}
-
-
-LIST * var_get_and_clear_raw( module_t * module, OBJECT * symbol )
-{
- LIST * result = L0;
- VARIABLE * v;
-
- if ( module->variables && ( v = (VARIABLE *)hash_find( module->variables,
- symbol ) ) )
- {
- result = v->value;
- v->value = L0;
- }
-
- return result;
-}
-
-
-/*
- * var_set() - set a variable in Jam's user defined symbol table
- *
- * 'flag' controls the relationship between new and old values of the variable:
- * SET replaces the old with the new; APPEND appends the new to the old; DEFAULT
- * only uses the new if the variable was previously unset.
- *
- * Copies symbol. Takes ownership of value.
- */
-
-void var_set( struct module_t * module, OBJECT * symbol, LIST * value, int flag
- )
-{
- LIST * * v = var_enter( module, symbol );
-
- if ( DEBUG_VARSET )
- var_dump( symbol, value, "set" );
-
- switch ( flag )
- {
- case VAR_SET: /* Replace value */
- list_free( *v );
- *v = value;
- break;
-
- case VAR_APPEND: /* Append value */
- *v = list_append( *v, value );
- break;
-
- case VAR_DEFAULT: /* Set only if unset */
- if ( list_empty( *v ) )
- *v = value;
- else
- list_free( value );
- break;
- }
-}
-
-
-/*
- * var_swap() - swap a variable's value with the given one
- */
-
-LIST * var_swap( struct module_t * module, OBJECT * symbol, LIST * value )
-{
- LIST * * v = var_enter( module, symbol );
- LIST * oldvalue = *v;
- if ( DEBUG_VARSET )
- var_dump( symbol, value, "set" );
- *v = value;
- return oldvalue;
-}
-
-
-/*
- * var_enter() - make new var symbol table entry, returning var ptr
- */
-
-static LIST * * var_enter( struct module_t * module, OBJECT * symbol )
-{
- int found;
- VARIABLE * v;
- int n;
-
- if ( ( n = module_get_fixed_var( module, symbol ) ) != -1 )
- return &module->fixed_variables[ n ];
-
- if ( !module->variables )
- module->variables = hashinit( sizeof( VARIABLE ), "variables" );
-
- v = (VARIABLE *)hash_insert( module->variables, symbol, &found );
- if ( !found )
- {
- v->symbol = object_copy( symbol );
- v->value = L0;
- }
-
- return &v->value;
-}
-
-
-/*
- * var_dump() - dump a variable to stdout
- */
-
-static void var_dump( OBJECT * symbol, LIST * value, char * what )
-{
- printf( "%s %s = ", what, object_str( symbol ) );
- list_print( value );
- printf( "\n" );
-}
-
-
-/*
- * var_done() - free variable tables
- */
-
-static void delete_var_( void * xvar, void * data )
-{
- VARIABLE * const v = (VARIABLE *)xvar;
- object_free( v->symbol );
- list_free( v->value );
-}
-
-void var_done( struct module_t * module )
-{
- list_free( saved_var );
- saved_var = L0;
- hashenumerate( module->variables, delete_var_, 0 );
- hash_free( module->variables );
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/variable.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/variable.h b/ext/kenlm/jam-files/engine/variable.h
deleted file mode 100644
index ddb452b..0000000
--- a/ext/kenlm/jam-files/engine/variable.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 1993, 2000 Christopher Seiwald.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/*
- * variable.h - handle jam multi-element variables
- */
-
-#ifndef VARIABLE_SW20111119_H
-#define VARIABLE_SW20111119_H
-
-#include "lists.h"
-#include "object.h"
-
-
-struct module_t;
-
-void var_defines( struct module_t *, char * const * e, int preprocess );
-LIST * var_get( struct module_t *, OBJECT * symbol );
-void var_set( struct module_t *, OBJECT * symbol, LIST * value, int flag );
-LIST * var_swap( struct module_t *, OBJECT * symbol, LIST * value );
-void var_done( struct module_t * );
-
-/*
- * Defines for var_set().
- */
-
-#define VAR_SET 0 /* override previous value */
-#define VAR_APPEND 1 /* append to previous value */
-#define VAR_DEFAULT 2 /* set only if no previous value */
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/w32_getreg.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/w32_getreg.c b/ext/kenlm/jam-files/engine/w32_getreg.c
deleted file mode 100644
index dd2d0fc..0000000
--- a/ext/kenlm/jam-files/engine/w32_getreg.c
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
-Copyright Paul Lin 2003. Copyright 2006 Bojan Resnik.
-Distributed under the Boost Software License, Version 1.0. (See accompanying
-file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-*/
-
-# include "jam.h"
-
-# if defined( OS_NT ) || defined( OS_CYGWIN )
-
-# include "lists.h"
-# include "object.h"
-# include "parse.h"
-# include "frames.h"
-# include "strings.h"
-
-# define WIN32_LEAN_AND_MEAN
-# include <windows.h>
-
-# define MAX_REGISTRY_DATA_LENGTH 4096
-# define MAX_REGISTRY_KEYNAME_LENGTH 256
-# define MAX_REGISTRY_VALUENAME_LENGTH 16384
-
-typedef struct
-{
- LPCSTR name;
- HKEY value;
-} KeyMap;
-
-static const KeyMap dlRootKeys[] = {
- { "HKLM", HKEY_LOCAL_MACHINE },
- { "HKCU", HKEY_CURRENT_USER },
- { "HKCR", HKEY_CLASSES_ROOT },
- { "HKEY_LOCAL_MACHINE", HKEY_LOCAL_MACHINE },
- { "HKEY_CURRENT_USER", HKEY_CURRENT_USER },
- { "HKEY_CLASSES_ROOT", HKEY_CLASSES_ROOT },
- { 0, 0 }
-};
-
-static HKEY get_key(char const** path)
-{
- const KeyMap *p;
-
- for (p = dlRootKeys; p->name; ++p)
- {
- int n = strlen(p->name);
- if (!strncmp(*path,p->name,n))
- {
- if ((*path)[n] == '\\' || (*path)[n] == 0)
- {
- *path += n + 1;
- break;
- }
- }
- }
-
- return p->value;
-}
-
-LIST * builtin_system_registry( FRAME * frame, int flags )
-{
- char const* path = object_str( list_front( lol_get(frame->args, 0) ) );
- LIST* result = L0;
- HKEY key = get_key(&path);
-
- if (
- key != 0
- && ERROR_SUCCESS == RegOpenKeyEx(key, path, 0, KEY_QUERY_VALUE, &key)
- )
- {
- DWORD type;
- BYTE data[MAX_REGISTRY_DATA_LENGTH];
- DWORD len = sizeof(data);
- LIST * const field = lol_get(frame->args, 1);
-
- if ( ERROR_SUCCESS ==
- RegQueryValueEx(key, field ? object_str( list_front( field ) ) : 0, 0, &type, data, &len) )
- {
- switch (type)
- {
-
- case REG_EXPAND_SZ:
- {
- long len;
- string expanded[1];
- string_new(expanded);
-
- while (
- (len = ExpandEnvironmentStrings(
- (LPCSTR)data, expanded->value, expanded->capacity))
- > expanded->capacity
- )
- string_reserve(expanded, len);
-
- expanded->size = len - 1;
-
- result = list_push_back( result, object_new(expanded->value) );
- string_free( expanded );
- }
- break;
-
- case REG_MULTI_SZ:
- {
- char* s;
-
- for (s = (char*)data; *s; s += strlen(s) + 1)
- result = list_push_back( result, object_new(s) );
-
- }
- break;
-
- case REG_DWORD:
- {
- char buf[100];
- sprintf( buf, "%u", *(PDWORD)data );
- result = list_push_back( result, object_new(buf) );
- }
- break;
-
- case REG_SZ:
- result = list_push_back( result, object_new( (const char *)data ) );
- break;
- }
- }
- RegCloseKey(key);
- }
- return result;
-}
-
-static LIST* get_subkey_names(HKEY key, char const* path)
-{
- LIST* result = 0;
-
- if ( ERROR_SUCCESS ==
- RegOpenKeyEx(key, path, 0, KEY_ENUMERATE_SUB_KEYS, &key)
- )
- {
- char name[MAX_REGISTRY_KEYNAME_LENGTH];
- DWORD name_size = sizeof(name);
- DWORD index;
- FILETIME last_write_time;
-
- for ( index = 0;
- ERROR_SUCCESS == RegEnumKeyEx(
- key, index, name, &name_size, 0, 0, 0, &last_write_time);
- ++index,
- name_size = sizeof(name)
- )
- {
- name[name_size] = 0;
- result = list_append(result, list_new(object_new(name)));
- }
-
- RegCloseKey(key);
- }
-
- return result;
-}
-
-static LIST* get_value_names(HKEY key, char const* path)
-{
- LIST* result = 0;
-
- if ( ERROR_SUCCESS == RegOpenKeyEx(key, path, 0, KEY_QUERY_VALUE, &key) )
- {
- char name[MAX_REGISTRY_VALUENAME_LENGTH];
- DWORD name_size = sizeof(name);
- DWORD index;
-
- for ( index = 0;
- ERROR_SUCCESS == RegEnumValue(
- key, index, name, &name_size, 0, 0, 0, 0);
- ++index,
- name_size = sizeof(name)
- )
- {
- name[name_size] = 0;
- result = list_append(result, list_new(object_new(name)));
- }
-
- RegCloseKey(key);
- }
-
- return result;
-}
-
-LIST * builtin_system_registry_names( FRAME * frame, int flags )
-{
- char const* path = object_str( list_front( lol_get(frame->args, 0) ) );
- char const* result_type = object_str( list_front( lol_get(frame->args, 1) ) );
-
- HKEY key = get_key(&path);
-
- if ( !strcmp(result_type, "subkeys") )
- return get_subkey_names(key, path);
- if ( !strcmp(result_type, "values") )
- return get_value_names(key, path);
- return 0;
-}
-
-# endif
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/yyacc.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/yyacc.c b/ext/kenlm/jam-files/engine/yyacc.c
deleted file mode 100644
index b5efc96..0000000
--- a/ext/kenlm/jam-files/engine/yyacc.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/* Copyright 2002 Rene Rivera.
-** Distributed under the Boost Software License, Version 1.0.
-** (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
-*/
-
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <stdlib.h>
-
-/*
-# yyacc - yacc wrapper
-#
-# Allows tokens to be written as `literal` and then automatically
-# substituted with #defined tokens.
-#
-# Usage:
-# yyacc file.y filetab.h file.yy
-#
-# inputs:
-# file.yy yacc grammar with ` literals
-#
-# outputs:
-# file.y yacc grammar
-# filetab.h array of string <-> token mappings
-#
-# 3-13-93
-# Documented and p moved in sed command (for some reason,
-# s/x/y/p doesn't work).
-# 10-12-93
-# Take basename as second argument.
-# 12-31-96
-# reversed order of args to be compatible with GenFile rule
-# 11-20-2002
-# Reimplemented as a C program for portability. (Rene Rivera)
-*/
-
-void print_usage();
-char * copy_string(char * s, int l);
-char * tokenize_string(char * s);
-int cmp_literal(const void * a, const void * b);
-
-typedef struct
-{
- char * string;
- char * token;
-} literal;
-
-int main(int argc, char ** argv)
-{
- int result = 0;
- if (argc != 4)
- {
- print_usage();
- result = 1;
- }
- else
- {
- FILE * token_output_f = 0;
- FILE * grammar_output_f = 0;
- FILE * grammar_source_f = 0;
-
- grammar_source_f = fopen(argv[3],"r");
- if (grammar_source_f == 0) { result = 1; }
- if (result == 0)
- {
- literal literals[1024];
- int t = 0;
- char l[2048];
- while (1)
- {
- if (fgets(l,2048,grammar_source_f) != 0)
- {
- char * c = l;
- while (1)
- {
- char * c1 = strchr(c,'`');
- if (c1 != 0)
- {
- char * c2 = strchr(c1+1,'`');
- if (c2 != 0)
- {
- literals[t].string = copy_string(c1+1,c2-c1-1);
- literals[t].token = tokenize_string(literals[t].string);
- t += 1;
- c = c2+1;
- }
- else
- break;
- }
- else
- break;
- }
- }
- else
- {
- break;
- }
- }
- literals[t].string = 0;
- literals[t].token = 0;
- qsort(literals,t,sizeof(literal),cmp_literal);
- {
- int p = 1;
- int i = 1;
- while (literals[i].string != 0)
- {
- if (strcmp(literals[p-1].string,literals[i].string) != 0)
- {
- literals[p] = literals[i];
- p += 1;
- }
- i += 1;
- }
- literals[p].string = 0;
- literals[p].token = 0;
- t = p;
- }
- token_output_f = fopen(argv[2],"w");
- if (token_output_f != 0)
- {
- int i = 0;
- while (literals[i].string != 0)
- {
- fprintf(token_output_f," { \"%s\", %s },\n",literals[i].string,literals[i].token);
- i += 1;
- }
- fclose(token_output_f);
- }
- else
- result = 1;
- if (result == 0)
- {
- grammar_output_f = fopen(argv[1],"w");
- if (grammar_output_f != 0)
- {
- int i = 0;
- while (literals[i].string != 0)
- {
- fprintf(grammar_output_f,"%%token %s\n",literals[i].token);
- i += 1;
- }
- rewind(grammar_source_f);
- while (1)
- {
- if (fgets(l,2048,grammar_source_f) != 0)
- {
- char * c = l;
- while (1)
- {
- char * c1 = strchr(c,'`');
- if (c1 != 0)
- {
- char * c2 = strchr(c1+1,'`');
- if (c2 != 0)
- {
- literal key;
- literal * replacement = 0;
- key.string = copy_string(c1+1,c2-c1-1);
- key.token = 0;
- replacement = (literal*)bsearch(
- &key,literals,t,sizeof(literal),cmp_literal);
- *c1 = 0;
- fprintf(grammar_output_f,"%s%s",c,replacement->token);
- c = c2+1;
- }
- else
- {
- fprintf(grammar_output_f,"%s",c);
- break;
- }
- }
- else
- {
- fprintf(grammar_output_f,"%s",c);
- break;
- }
- }
- }
- else
- {
- break;
- }
- }
- fclose(grammar_output_f);
- }
- else
- result = 1;
- }
- }
- if (result != 0)
- {
- perror("yyacc");
- }
- }
- return result;
-}
-
-static char * usage[] = {
- "yyacc <grammar output.y> <token table output.h> <grammar source.yy>",
- 0 };
-
-void print_usage()
-{
- char ** u;
- for (u = usage; *u != 0; ++u)
- {
- fputs(*u,stderr); putc('\n',stderr);
- }
-}
-
-char * copy_string(char * s, int l)
-{
- char * result = (char*)malloc(l+1);
- strncpy(result,s,l);
- result[l] = 0;
- return result;
-}
-
-char * tokenize_string(char * s)
-{
- char * result;
- char * literal = s;
- int l;
- int c;
-
- if (strcmp(s,":") == 0) literal = "_colon";
- else if (strcmp(s,"!") == 0) literal = "_bang";
- else if (strcmp(s,"!=") == 0) literal = "_bang_equals";
- else if (strcmp(s,"&&") == 0) literal = "_amperamper";
- else if (strcmp(s,"&") == 0) literal = "_amper";
- else if (strcmp(s,"+") == 0) literal = "_plus";
- else if (strcmp(s,"+=") == 0) literal = "_plus_equals";
- else if (strcmp(s,"||") == 0) literal = "_barbar";
- else if (strcmp(s,"|") == 0) literal = "_bar";
- else if (strcmp(s,";") == 0) literal = "_semic";
- else if (strcmp(s,"-") == 0) literal = "_minus";
- else if (strcmp(s,"<") == 0) literal = "_langle";
- else if (strcmp(s,"<=") == 0) literal = "_langle_equals";
- else if (strcmp(s,">") == 0) literal = "_rangle";
- else if (strcmp(s,">=") == 0) literal = "_rangle_equals";
- else if (strcmp(s,".") == 0) literal = "_period";
- else if (strcmp(s,"?") == 0) literal = "_question";
- else if (strcmp(s,"?=") == 0) literal = "_question_equals";
- else if (strcmp(s,"=") == 0) literal = "_equals";
- else if (strcmp(s,",") == 0) literal = "_comma";
- else if (strcmp(s,"[") == 0) literal = "_lbracket";
- else if (strcmp(s,"]") == 0) literal = "_rbracket";
- else if (strcmp(s,"{") == 0) literal = "_lbrace";
- else if (strcmp(s,"}") == 0) literal = "_rbrace";
- else if (strcmp(s,"(") == 0) literal = "_lparen";
- else if (strcmp(s,")") == 0) literal = "_rparen";
- l = strlen(literal)+2;
- result = (char*)malloc(l+1);
- for (c = 0; literal[c] != 0; ++c)
- {
- result[c] = toupper(literal[c]);
- }
- result[l-2] = '_';
- result[l-1] = 't';
- result[l] = 0;
- return result;
-}
-
-int cmp_literal(const void * a, const void * b)
-{
- return strcmp(((const literal *)a)->string,((const literal *)b)->string);
-}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/fail/Jamroot
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/fail/Jamroot b/ext/kenlm/jam-files/fail/Jamroot
deleted file mode 100644
index c3584d8..0000000
--- a/ext/kenlm/jam-files/fail/Jamroot
+++ /dev/null
@@ -1,4 +0,0 @@
-actions fail {
- false
-}
-make fail : : fail ;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/sanity.jam
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/sanity.jam b/ext/kenlm/jam-files/sanity.jam
deleted file mode 100644
index 1851ece..0000000
--- a/ext/kenlm/jam-files/sanity.jam
+++ /dev/null
@@ -1,344 +0,0 @@
-import modules ;
-import option ;
-import os ;
-import path ;
-import project ;
-import build-system ;
-import version ;
-
-#Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php
-rule trim-nl ( str extras * ) {
-return [ MATCH "([^
-]*)" : $(str) ] $(extras) ;
-}
-rule _shell ( cmd : extras * ) {
- return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ;
-}
-
-rule shell_or_fail ( cmd ) {
- local ret = [ SHELL $(cmd) : exit-status ] ;
- if $(ret[2]) != 0 {
- exit $(cmd) failed : 1 ;
- }
-}
-
-rule shell_or_die ( cmd ) {
- local ret = [ SHELL $(cmd) : exit-status ] ;
- if $(ret[2]) != 0 {
- exit $(cmd) failed : 1 ;
- }
- return [ trim-nl $(ret[1]) ] ;
-}
-
-cxxflags = [ os.environ "CXXFLAGS" ] ;
-cflags = [ os.environ "CFLAGS" ] ;
-ldflags = [ os.environ "LDFLAGS" ] ;
-
-#Run g++ with empty main and these arguments to see if it passes.
-rule test_flags ( flags * : main ? ) {
- flags = $(cxxflags) $(ldflags) $(flags) ;
- if ! $(main) {
- main = "int main() {}" ;
- }
- local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'$(main)' -o $(TOP)/dummy >/dev/null 2>/dev/null && rm $(TOP)/dummy 2>/dev/null\"" ;
- local ret = [ SHELL $(cmd) : exit-status ] ;
- if --debug-configuration in [ modules.peek : ARGV ] {
- echo $(cmd) ;
- echo $(ret) ;
- }
- if $(ret[2]) = 0 {
- return true ;
- } else {
- return ;
- }
-}
-
-rule test_header ( name ) {
- return [ test_flags "-include $(name)" ] ;
-}
-
-requirements = ;
-
-FORCE-STATIC = [ option.get "static" : : "yes" ] ;
-if $(FORCE-STATIC) {
- requirements += <link>static <runtime-link>static ;
-}
-
-rule test_library ( name ) {
- if $(FORCE-STATIC) {
- return [ test_flags "-Wl,-Bstatic -l$(name) -Wl,-Bdynamic" ] ;
- } else {
- return [ test_flags "-l$(name)" ] ;
- }
-}
-
-{
- local cleaning = [ option.get "clean" : : yes ] ;
- cleaning ?= [ option.get "clean-all" : no : yes ] ;
- if "clean" in [ modules.peek : ARGV ] {
- cleaning = yes ;
- }
- constant CLEANING : $(cleaning) ;
-}
-
-shared-command-line = ;
-local argv = [ modules.peek : ARGV ] ;
-while $(argv) {
- if $(argv[1]) = "link=shared" {
- shared-command-line = <link>shared ;
- }
- argv = $(argv[2-]) ;
-}
-
-#Determine if a library can be compiled statically.
-rule auto-shared ( name : additional * ) {
-
- additional ?= "" ;
- if $(shared-command-line) = "<link>shared" {
- return "<link>shared" ;
- } else {
- if [ test_flags $(additional)" -Wl,-Bstatic -l"$(name)" -Wl,-Bdynamic" ] {
- return ;
- } else {
- if $(FORCE-STATIC) {
- echo "Could not statically link against lib $(name). Your build will probably fail." ;
- return ;
- } else {
- return "<link>shared" ;
- }
- }
- }
-}
-
-# MacPorts' default location is /opt/local -- use this if no path is given.
-with-macports = [ option.get "with-macports" : : "/opt/local" ] ;
-if $(with-macports) {
- using darwin ;
- ECHO "Using --with-macports=$(with-macports), implying use of darwin GCC" ;
-
- L-boost-search = -L$(with-macports)/lib ;
- boost-search = <search>$(with-macports)/lib ;
- I-boost-include = -I$(with-macports)/include ;
- boost-include = <include>$(with-macports)/include ;
- requirements += $(boost-include) ;
-} else {
- with-boost = [ option.get "with-boost" ] ;
- with-boost ?= [ os.environ "BOOST_ROOT" ] ;
- if $(with-boost) {
- L-boost-search = -L$(with-boost)/lib" "-L$(with-boost)/lib64 ;
- boost-search = <search>$(with-boost)/lib <search>$(with-boost)/lib64 ;
- I-boost-include = -I$(with-boost)/include ;
- boost-include = <include>$(with-boost)/include ;
- requirements += $(boost-include) ;
- } else {
- L-boost-search = "" ;
- boost-search = ;
- I-boost-include = "" ;
- boost-include = ;
- }
-}
-
-#Convenience rule for boost libraries. Defines library boost_$(name).
-rule boost-lib ( name macro : deps * ) {
- lib boost_$(name)_static : $(deps) : $(boost-search) <name>boost_$(name)$(boost-lib-version) <link>static ;
- lib boost_$(name)_shared : $(deps) : $(boost-search) <name>boost_$(name)$(boost-lib-version) <link>shared : : <define>BOOST_$(macro) ;
-
- alias boost_$(name)_default : $(deps) : <link>static:<source>boost_$(name)_static <link>shared:<source>boost_$(name)_shared ;
-
- alias boost_$(name)_static_works : $(deps) : [ check-target-builds empty_test_shared "Shared Boost" : <source>boost_$(name)_default : <source>boost_$(name)_static ] ;
- alias boost_$(name) : $(deps) : [ check-target-builds empty_test_static "Static Boost" : <source>boost_$(name)_static_works : <source>boost_$(name)_shared ] ;
-}
-
-#Argument is e.g. 103600
-rule boost ( min-version ) {
- local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_'\"" ;
- local boost-shell = [ SHELL "$(cmd)" : exit-status ] ;
- if $(boost-shell[2]) != 0 && $(CLEANING) = no {
- echo Failed to run "$(cmd)" ;
- exit Boost does not seem to be installed or g++ is confused. : 1 ;
- }
- constant BOOST-VERSION : [ MATCH "#define BOOST_VERSION ([0-9]*)" : $(boost-shell[1]) ] ;
- if $(BOOST-VERSION) < $(min-version) && $(CLEANING) = no {
- exit You have Boost $(BOOST-VERSION). This package requires Boost at least $(min-version) (and preferably newer). : 1 ;
- }
- # If matching version tags exist, use them.
- boost-lib-version = [ MATCH "#define BOOST_LIB_VERSION \"([^\"]*)\"" : $(boost-shell[1]) ] ;
- if [ test_flags $(L-boost-search)" -lboost_program_options-"$(boost-lib-version) ] {
- boost-lib-version = "-"$(boost-lib-version) ;
- } else {
- boost-lib-version = "" ;
- }
-
- #Crazy amount of testing to make sure that BOOST_TEST_DYN_LINK is defined properly.
- lib boost_unit_test_framework_static_test : : $(boost-search) <name>boost_unit_test_framework$(boost-lib-version) <link>static ;
- obj empty_test_static.o : jam-files/empty_test_main.cc boost_unit_test_framework_static_test : $(boost-include) ;
- exe empty_test_static : empty_test_static.o boost_unit_test_framework_static_test ;
-
- lib boost_unit_test_framework_shared_test : : $(boost-search) <name>boost_unit_test_framework$(boost-lib-version) <link>shared : : <define>BOOST_TEST_DYN_LINK ;
- obj empty_test_shared.o : jam-files/empty_test_main.cc boost_unit_test_framework_shared_test : $(boost-include) ;
- exe empty_test_shared : empty_test_shared.o boost_unit_test_framework_shared_test ;
-
- explicit empty_test_static.o empty_test_static empty_test_shared.o empty_test_shared ;
-
-
- #See tools/build/v2/contrib/boost.jam in a boost distribution for a table of macros to define.
- boost-lib system SYSTEM_DYN_LINK ;
- boost-lib thread THREAD_DYN_DLL : boost_system ;
- boost-lib program_options PROGRAM_OPTIONS_DYN_LINK ;
- boost-lib iostreams IOSTREAMS_DYN_LINK ;
- boost-lib filesystem FILE_SYSTEM_DYN_LINK ;
- boost-lib unit_test_framework TEST_DYN_LINK ;
-# if $(BOOST-VERSION) >= 104800 {
-# boost-lib chrono CHRONO_DYN_LINK ;
-# boost-lib timer TIMER_DYN_LINK : boost_chrono ;
-# }
-}
-
-#Link normally to a library, but sometimes static isn't installed so fall back to dynamic.
-rule external-lib ( name : search-path * : deps * ) {
- lib $(name) : : [ auto-shared $(name) : "-L"$(search-path) ] <search>$(search-path) <use>$(deps) ;
-}
-
-#Write the current command line to previous.sh. This does not do shell escaping.
-{
- local build-log = $(TOP)/previous.sh ;
- if ! [ path.exists $(build-log) ] {
- SHELL "touch \"$(build-log)\" && chmod +x \"$(build-log)\"" ;
- }
- local script = [ modules.peek : ARGV ] ;
- if $(script[1]) = "./jam-files/bjam" {
- #The ./bjam shell script calls ./jam-files/bjam so that appears in argv but
- #we want ./bjam to appear so the environment variables are set correctly.
- script = "./bjam "$(script[2-]:J=" ") ;
- } else {
- script = $(script:J=" ") ;
- }
- script = "#!/bin/sh\n$(script)\n" ;
- local ignored = @($(build-log):E=$(script)) ;
-}
-
-#Boost jam's static clang for Linux is buggy.
-requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <os>LINUX,<toolset>clang:<link>shared ;
-
-if ! [ option.get "without-libsegfault" : : "yes" ] && ! $(FORCE-STATIC) {
- #libSegFault prints a stack trace on segfault. Link against it if available.
- if [ test_flags "-lSegFault" ] {
- external-lib SegFault ;
- requirements += <library>SegFault ;
- }
-}
-
-if [ option.get "git" : : "yes" ] {
- local revision = [ _shell "git rev-parse --verify HEAD |head -c 7" ] ;
- constant GITTAG : "/"$(revision) ;
-} else {
- constant GITTAG : "" ;
-}
-
-local prefix = [ option.get "prefix" ] ;
-if $(prefix) {
- prefix = [ path.root $(prefix) [ path.pwd ] ] ;
- prefix = $(prefix)$(GITTAG) ;
-} else {
- prefix = $(TOP)$(GITTAG) ;
-}
-
-path-constant PREFIX : $(prefix) ;
-
-path-constant BINDIR : [ option.get "bindir" : $(PREFIX)/bin ] ;
-path-constant LIBDIR : [ option.get "libdir" : $(PREFIX)/lib ] ;
-rule install-bin-libs ( deps * ) {
- install prefix-bin : $(deps) : <location>$(BINDIR) <install-dependencies>on <install-type>EXE <link>shared:<dll-path>$(LIBDIR) ;
- install prefix-lib : $(deps) : <location>$(LIBDIR) <install-dependencies>on <install-type>LIB <link>shared:<dll-path>$(LIBDIR) ;
-}
-rule install-headers ( name : list * : source-root ? ) {
- local includedir = [ option.get "includedir" : $(prefix)/include ] ;
- source-root ?= "." ;
- install $(name) : $(list) : <location>$(includedir) <install-source-root>$(source-root) ;
-}
-
-rule build-projects ( projects * ) {
- for local p in $(projects) {
- build-project $(p) ;
- }
-}
-
-#Only one post build hook is allowed. Allow multiple.
-post-hooks = ;
-rule post-build ( ok ? ) {
- for local r in $(post-hooks) {
- $(r) $(ok) ;
- }
-}
-IMPORT $(__name__) : post-build : : $(__name__).post-build ;
-build-system.set-post-build-hook $(__name__).post-build ;
-rule add-post-hook ( names * ) {
- post-hooks += $(names) ;
-}
-
-rule failure-message ( ok ? ) {
- if $(ok) != "ok" {
- local args = [ modules.peek : ARGV ] ;
- local args = $(args:J=" ") ;
- if --debug-configuration in [ modules.peek : ARGV ] {
- echo "The build failed with command line: " ;
- echo " $(args)" ;
- echo "If you need support, attach the full output to your e-mail." ;
- } else {
- echo "The build failed. If you need support, run:" ;
- echo " $(args) --debug-configuration -d2 |gzip >build.log.gz" ;
- echo "then attach build.log.gz to your e-mail." ;
- }
- echo "ERROR" ;
- } else {
- echo "SUCCESS" ;
- }
-}
-add-post-hook failure-message ;
-
-import feature : feature ;
-feature options-to-write : : free ;
-import toolset : flags ;
-flags write-options OPTIONS-TO-WRITE <options-to-write> ;
-actions write-options {
- echo "$(OPTIONS-TO-WRITE)" > $(<) ;
-}
-
-#Compare contents of file with current. If they're different, write to the
-#file. This file can then be used with <dependency>$(file) to force
-#recompilation.
-rule update-if-changed ( file current ) {
- if ( ! [ path.exists $(file) ] ) || ( [ _shell "cat $(file)" ] != $(current) ) {
- make $(file) : : $(__name__).write-options : <options-to-write>$(current) ;
- always $(file) ;
- }
-}
-
-if [ option.get "sanity-test" : : "yes" ] {
- local current_version = [ modules.peek : JAM_VERSION ] ;
- if ( $(current_version[0]) < 2000 && [ version.check-jam-version 3 1 16 ] ) || [ version.check-jam-version 2011 0 0 ] {
- EXIT "Sane" : 0 ;
- } else {
- EXIT "Bad" : 1 ;
- }
-}
-
-#Hack to act like alias in the sense that no lib is built, but only build cpp files once.
-import type ;
-rule fakelib ( name : deps * : requirements * : default-build * : usage-requirements * ) {
- local c-files = ;
- local real-deps = ;
- for local c in $(deps) {
- if [ type.type $(c) ] = CPP {
- c-files += $(c) ;
- } else {
- real-deps += $(c) ;
- }
- }
- for local c in $(c-files) {
- obj $(c:B).o : $(c) $(real-deps) : $(requirements) : $(default-build) : $(usage_requirements) ;
- }
- alias $(name) : $(c-files:B).o $(real-deps) : $(requirements) : $(default-build) : $(usage-requirements) ;
-}
-
-use-project /top : . ;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/CMakeLists.txt b/ext/kenlm/lm/CMakeLists.txt
deleted file mode 100644
index e3ef06f..0000000
--- a/ext/kenlm/lm/CMakeLists.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-cmake_minimum_required(VERSION 2.8.8)
-#
-# The KenLM cmake files make use of add_library(... OBJECTS ...)
-#
-# This syntax allows grouping of source files when compiling
-# (effectively creating "fake" libraries based on source subdirs).
-#
-# This syntax was only added in cmake version 2.8.8
-#
-# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
-
-
-# This CMake file was created by Lane Schwartz <do...@gmail.com>
-
-
-set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order")
-
-add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
-
-
-# Explicitly list the source files for this subdirectory
-#
-# If you add any source files to this subdirectory
-# that should be included in the kenlm library,
-# (this excludes any unit test files)
-# you should add them to the following list:
-set(KENLM_SOURCE
- bhiksha.cc
- binary_format.cc
- config.cc
- lm_exception.cc
- model.cc
- quantize.cc
- read_arpa.cc
- search_hashed.cc
- search_trie.cc
- sizes.cc
- trie.cc
- trie_sort.cc
- value_build.cc
- virtual_interface.cc
- vocab.cc
-)
-
-
-# Group these objects together for later use.
-#
-# Given add_library(foo OBJECT ${my_foo_sources}),
-# refer to these objects as $<TARGET_OBJECTS:foo>
-#
-add_library(kenlm OBJECT ${KENLM_SOURCE})
-
-# This directory has children that need to be processed
-add_subdirectory(builder)
-add_subdirectory(common)
-add_subdirectory(filter)
-
-
-
-# Explicitly list the executable files to be compiled
-set(EXE_LIST
- query
- fragment
- build_binary
-)
-
-AddExes(EXES ${EXE_LIST}
- DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
- LIBRARIES ${Boost_LIBRARIES} pthread)
-
-# Conditionally build the interpolation code
-if(BUILD_INTERPOLATE)
- add_subdirectory(interpolate)
-endif()
-
-if(BUILD_TESTING)
-
- set(KENLM_BOOST_TESTS_LIST left_test partial_test)
- AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
- DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
- LIBRARIES ${Boost_LIBRARIES} pthread
- TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa)
-
- # model_test requires an extra command line parameter
- KenLMAddTest(TEST model_test
- DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
- LIBRARIES ${Boost_LIBRARIES} pthread
- TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
- ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa)
-endif()
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/Jamfile
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/Jamfile b/ext/kenlm/lm/Jamfile
deleted file mode 100644
index a479e2d..0000000
--- a/ext/kenlm/lm/Jamfile
+++ /dev/null
@@ -1,40 +0,0 @@
-# If you need higher order, change this option
-# Having this limit means that State can be
-# (KENLM_MAX_ORDER - 1) * sizeof(float) bytes instead of
-# sizeof(float*) + (KENLM_MAX_ORDER - 1) * sizeof(float) + malloc overhead
-max-order = [ option.get "max-kenlm-order" : 6 : 6 ] ;
-if ( $(max-order) != 6 ) {
- echo "Setting KenLM maximum n-gram order to $(max-order)" ;
-}
-max-order = <define>KENLM_MAX_ORDER=$(max-order) ;
-
-path-constant ORDER-LOG : bin/order.log ;
-update-if-changed $(ORDER-LOG) $(max-order) ;
-
-max-order += <dependency>$(ORDER-LOG) ;
-
-wrappers = ;
-local with-nplm = [ option.get "with-nplm" ] ;
-if $(with-nplm) {
- lib nplm : : <search>$(with-nplm)/src ;
- obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ;
- alias nplm-all : nplm.o nplm ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
- wrappers += nplm-all ;
-}
-
-fakelib kenlm : $(wrappers) [ glob *.cc : *main.cc *test.cc ] ../util//kenutil : <include>.. $(max-order) : : <include>.. $(max-order) ;
-
-import testing ;
-
-run left_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ;
-run model_test.cc kenlm /top//boost_unit_test_framework : : test.arpa test_nounk.arpa ;
-run partial_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ;
-
-exes = ;
-for local p in [ glob *_main.cc ] {
- local name = [ MATCH "(.*)\_main.cc" : $(p) ] ;
- exe $(name) : $(p) kenlm ;
- exes += $(name) ;
-}
-
-alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/bhiksha.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/bhiksha.cc b/ext/kenlm/lm/bhiksha.cc
deleted file mode 100644
index 4262b61..0000000
--- a/ext/kenlm/lm/bhiksha.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-#include "lm/bhiksha.hh"
-
-#include "lm/binary_format.hh"
-#include "lm/config.hh"
-#include "util/file.hh"
-#include "util/exception.hh"
-
-#include <limits>
-
-namespace lm {
-namespace ngram {
-namespace trie {
-
-DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) :
- next_(util::BitsMask::ByMax(max_next)) {}
-
-const uint8_t kArrayBhikshaVersion = 0;
-
-// TODO: put this in binary file header instead when I change the binary file format again.
-void ArrayBhiksha::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
- uint8_t buffer[2];
- file.ReadForConfig(buffer, 2, offset);
- uint8_t version = buffer[0];
- uint8_t configured_bits = buffer[1];
- if (version != kArrayBhikshaVersion) UTIL_THROW(FormatLoadException, "This file has sorted array compression version " << (unsigned) version << " but the code expects version " << (unsigned)kArrayBhikshaVersion);
- config.pointer_bhiksha_bits = configured_bits;
-}
-
-namespace {
-
-// Find argmin_{chopped \in [0, RequiredBits(max_next)]} ChoppedDelta(max_offset)
-uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
- uint8_t required = util::RequiredBits(max_next);
- uint8_t best_chop = 0;
- int64_t lowest_change = std::numeric_limits<int64_t>::max();
- // There are probably faster ways but I don't care because this is only done once per order at construction time.
- for (uint8_t chop = 0; chop <= std::min(required, config.pointer_bhiksha_bits); ++chop) {
- int64_t change = (max_next >> (required - chop)) * 64 /* table cost in bits */
- - max_offset * static_cast<int64_t>(chop); /* savings in bits*/
- if (change < lowest_change) {
- lowest_change = change;
- best_chop = chop;
- }
- }
- return best_chop;
-}
-
-std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &config) {
- uint8_t required = util::RequiredBits(max_next);
- uint8_t chopping = ChopBits(max_offset, max_next, config);
- return (max_next >> (required - chopping)) + 1 /* we store 0 too */;
-}
-} // namespace
-
-uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) {
- return sizeof(uint64_t) * (1 /* header */ + ArrayCount(max_offset, max_next, config)) + 7 /* 8-byte alignment */;
-}
-
-uint8_t ArrayBhiksha::InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
- return util::RequiredBits(max_next) - ChopBits(max_offset, max_next, config);
-}
-
-namespace {
-
-void *AlignTo8(void *from) {
- uint8_t *val = reinterpret_cast<uint8_t*>(from);
- std::size_t remainder = reinterpret_cast<std::size_t>(val) & 7;
- if (!remainder) return val;
- return val + 8 - remainder;
-}
-
-} // namespace
-
-ArrayBhiksha::ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_next, const Config &config)
- : next_inline_(util::BitsMask::ByBits(InlineBits(max_offset, max_next, config))),
- offset_begin_(reinterpret_cast<const uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */),
- offset_end_(offset_begin_ + ArrayCount(max_offset, max_next, config)),
- write_to_(reinterpret_cast<uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */ + 1 /* first entry is 0 */),
- original_base_(base) {}
-
-void ArrayBhiksha::FinishedLoading(const Config &config) {
- // *offset_begin_ = 0 but without a const_cast.
- *(write_to_ - (write_to_ - offset_begin_)) = 0;
-
- if (write_to_ != offset_end_) UTIL_THROW(util::Exception, "Did not get all the array entries that were expected.");
-
- uint8_t *head_write = reinterpret_cast<uint8_t*>(original_base_);
- *(head_write++) = kArrayBhikshaVersion;
- *(head_write++) = config.pointer_bhiksha_bits;
-}
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/bhiksha.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/bhiksha.hh b/ext/kenlm/lm/bhiksha.hh
deleted file mode 100644
index 36438f1..0000000
--- a/ext/kenlm/lm/bhiksha.hh
+++ /dev/null
@@ -1,122 +0,0 @@
-/* Simple implementation of
- * @inproceedings{bhikshacompression,
- * author={Bhiksha Raj and Ed Whittaker},
- * year={2003},
- * title={Lossless Compression of Language Model Structure and Word Identifiers},
- * booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing},
- * pages={388--391},
- * }
- *
- * Currently only used for next pointers.
- */
-
-#ifndef LM_BHIKSHA_H
-#define LM_BHIKSHA_H
-
-#include "lm/model_type.hh"
-#include "lm/trie.hh"
-#include "util/bit_packing.hh"
-#include "util/sorted_uniform.hh"
-
-#include <algorithm>
-#include <stdint.h>
-#include <cassert>
-
-namespace lm {
-namespace ngram {
-struct Config;
-class BinaryFormat;
-
-namespace trie {
-
-class DontBhiksha {
- public:
- static const ModelType kModelTypeAdd = static_cast<ModelType>(0);
-
- static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &/*config*/) {}
-
- static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; }
-
- static uint8_t InlineBits(uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) {
- return util::RequiredBits(max_next);
- }
-
- DontBhiksha(const void *base, uint64_t max_offset, uint64_t max_next, const Config &config);
-
- void ReadNext(const void *base, uint64_t bit_offset, uint64_t /*index*/, uint8_t total_bits, NodeRange &out) const {
- out.begin = util::ReadInt57(base, bit_offset, next_.bits, next_.mask);
- out.end = util::ReadInt57(base, bit_offset + total_bits, next_.bits, next_.mask);
- //assert(out.end >= out.begin);
- }
-
- void WriteNext(void *base, uint64_t bit_offset, uint64_t /*index*/, uint64_t value) {
- util::WriteInt57(base, bit_offset, next_.bits, value);
- }
-
- void FinishedLoading(const Config &/*config*/) {}
-
- uint8_t InlineBits() const { return next_.bits; }
-
- private:
- util::BitsMask next_;
-};
-
-class ArrayBhiksha {
- public:
- static const ModelType kModelTypeAdd = kArrayAdd;
-
- static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config);
-
- static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config);
-
- static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config);
-
- ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_value, const Config &config);
-
- void ReadNext(const void *base, uint64_t bit_offset, uint64_t index, uint8_t total_bits, NodeRange &out) const {
- // Some assertions are commented out because they are expensive.
- // assert(*offset_begin_ == 0);
- // std::upper_bound returns the first element that is greater. Want the
- // last element that is <= to the index.
- const uint64_t *begin_it = std::upper_bound(offset_begin_, offset_end_, index) - 1;
- // Since *offset_begin_ == 0, the position should be in range.
- // assert(begin_it >= offset_begin_);
- const uint64_t *end_it;
- for (end_it = begin_it + 1; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {}
- // assert(end_it == std::upper_bound(offset_begin_, offset_end_, index + 1));
- --end_it;
- // assert(end_it >= begin_it);
- out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
- util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask);
- out.end = ((end_it - offset_begin_) << next_inline_.bits) |
- util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask);
- // If this fails, consider rebuilding your model using KenLM after 1e333d786b748555e8f368d2bbba29a016c98052
- assert(out.end >= out.begin);
- }
-
- void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) {
- uint64_t encode = value >> next_inline_.bits;
- for (; write_to_ <= offset_begin_ + encode; ++write_to_) *write_to_ = index;
- util::WriteInt57(base, bit_offset, next_inline_.bits, value & next_inline_.mask);
- }
-
- void FinishedLoading(const Config &config);
-
- uint8_t InlineBits() const { return next_inline_.bits; }
-
- private:
- const util::BitsMask next_inline_;
-
- const uint64_t *const offset_begin_;
- const uint64_t *const offset_end_;
-
- uint64_t *write_to_;
-
- void *original_base_;
-};
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_BHIKSHA_H