You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/19 21:34:04 UTC

[16/51] [partial] incubator-joshua git commit: Converted KenLM into a submodule

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/search.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/search.c b/ext/kenlm/jam-files/engine/search.c
deleted file mode 100644
index b2beada..0000000
--- a/ext/kenlm/jam-files/engine/search.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/*
- * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/* This file is ALSO:
- * Copyright 2001-2004 David Abrahams.
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-#include "jam.h"
-#include "search.h"
-
-#include "compile.h"
-#include "filesys.h"
-#include "hash.h"
-#include "lists.h"
-#include "object.h"
-#include "pathsys.h"
-#include "strings.h"
-#include "timestamp.h"
-#include "variable.h"
-
-#include <string.h>
-
-
-typedef struct _binding
-{
-    OBJECT * binding;
-    OBJECT * target;
-} BINDING;
-
-static struct hash * explicit_bindings = 0;
-
-
-void call_bind_rule( OBJECT * target_, OBJECT * boundname_ )
-{
-    LIST * const bind_rule = var_get( root_module(), constant_BINDRULE );
-    if ( !list_empty( bind_rule ) )
-    {
-        OBJECT * target = object_copy( target_ );
-        OBJECT * boundname = object_copy( boundname_ );
-        if ( boundname && target )
-        {
-            /* Prepare the argument list. */
-            FRAME frame[ 1 ];
-            frame_init( frame );
-
-            /* First argument is the target name. */
-            lol_add( frame->args, list_new( target ) );
-
-            lol_add( frame->args, list_new( boundname ) );
-            if ( lol_get( frame->args, 1 ) )
-            {
-                OBJECT * rulename = list_front( bind_rule );
-                list_free( evaluate_rule( bindrule( rulename, root_module() ), rulename, frame ) );
-            }
-
-            /* Clean up */
-            frame_free( frame );
-        }
-        else
-        {
-            if ( boundname )
-                object_free( boundname );
-            if ( target )
-                object_free( target );
-        }
-    }
-}
-
-/* Records the binding of a target with an explicit LOCATE. */
-void set_explicit_binding( OBJECT * target, OBJECT * locate )
-{
-    OBJECT * boundname;
-    OBJECT * key;
-    PATHNAME f[ 1 ];
-    string buf[ 1 ];
-    int found;
-    BINDING * ba;
-
-    if ( !explicit_bindings )
-        explicit_bindings = hashinit( sizeof( BINDING ), "explicitly specified "
-            "locations" );
-
-    string_new( buf );
-
-    /* Parse the filename. */
-    path_parse( object_str( target ), f );
-
-    /* Ignore the grist. */
-    f->f_grist.ptr = 0;
-    f->f_grist.len = 0;
-
-    /* Root the target path at the given location. */
-    f->f_root.ptr = object_str( locate );
-    f->f_root.len = strlen( object_str( locate ) );
-
-    path_build( f, buf );
-    boundname = object_new( buf->value );
-    if ( DEBUG_SEARCH )
-        printf( "explicit locate %s: %s\n", object_str( target ), buf->value );
-    string_free( buf );
-    key = path_as_key( boundname );
-    object_free( boundname );
-
-    ba = (BINDING *)hash_insert( explicit_bindings, key, &found );
-    if ( !found )
-    {
-        ba->binding = key;
-        ba->target = target;
-    }
-    else
-        object_free( key );
-}
-
-/*
- * search.c - find a target along $(SEARCH) or $(LOCATE).
- *
- * First, check if LOCATE is set. If so, use it to determine the location of
- * target and return, regardless of whether anything exists at that location.
- *
- * Second, examine all directories in SEARCH. If the file exists there or there
- * is another target with the same name already placed at this location via the
- * LOCATE setting, stop and return the location. In case of a previous target,
- * return its name via the 'another_target' argument.
- *
- * This behaviour allows handling dependencies on generated files.
- *
- * If caller does not expect that the target is generated, 0 can be passed as
- * 'another_target'.
- */
-
-OBJECT * search( OBJECT * target, timestamp * const time,
-    OBJECT * * another_target, int const file )
-{
-    PATHNAME f[ 1 ];
-    LIST * varlist;
-    string buf[ 1 ];
-    int found = 0;
-    OBJECT * boundname = 0;
-
-    if ( another_target )
-        *another_target = 0;
-
-    if ( !explicit_bindings )
-        explicit_bindings = hashinit( sizeof( BINDING ), "explicitly specified "
-            "locations" );
-
-    string_new( buf );
-
-    /* Parse the filename. */
-    path_parse( object_str( target ), f );
-
-    f->f_grist.ptr = 0;
-    f->f_grist.len = 0;
-
-    varlist = var_get( root_module(), constant_LOCATE );
-    if ( !list_empty( varlist ) )
-    {
-        OBJECT * key;
-        f->f_root.ptr = object_str( list_front( varlist ) );
-        f->f_root.len = strlen( object_str( list_front( varlist ) ) );
-
-        path_build( f, buf );
-
-        if ( DEBUG_SEARCH )
-            printf( "locate %s: %s\n", object_str( target ), buf->value );
-
-        key = object_new( buf->value );
-        timestamp_from_path( time, key );
-        object_free( key );
-        found = 1;
-    }
-    else if ( varlist = var_get( root_module(), constant_SEARCH ),
-        !list_empty( varlist ) )
-    {
-        LISTITER iter = list_begin( varlist );
-        LISTITER const end = list_end( varlist );
-        for ( ; iter != end; iter = list_next( iter ) )
-        {
-            BINDING * ba;
-            file_info_t * ff;
-            OBJECT * key;
-            OBJECT * test_path;
-
-            f->f_root.ptr = object_str( list_item( iter ) );
-            f->f_root.len = strlen( object_str( list_item( iter ) ) );
-
-            string_truncate( buf, 0 );
-            path_build( f, buf );
-
-            if ( DEBUG_SEARCH )
-                printf( "search %s: %s\n", object_str( target ), buf->value );
-
-            test_path = object_new( buf->value );
-            key = path_as_key( test_path );
-            object_free( test_path );
-            ff = file_query( key );
-            timestamp_from_path( time, key );
-
-            if ( ( ba = (BINDING *)hash_find( explicit_bindings, key ) ) )
-            {
-                if ( DEBUG_SEARCH )
-                    printf(" search %s: found explicitly located target %s\n",
-                        object_str( target ), object_str( ba->target ) );
-                if ( another_target )
-                    *another_target = ba->target;
-                found = 1;
-                object_free( key );
-                break;
-            }
-            else if ( ff )
-            {
-                if ( !file || ff->is_file )
-                {
-                    found = 1;
-                    object_free( key );
-                    break;
-                }
-            }
-            object_free( key );
-        }
-    }
-
-    if ( !found )
-    {
-        /* Look for the obvious. */
-        /* This is a questionable move. Should we look in the obvious place if
-         * SEARCH is set?
-         */
-        OBJECT * key;
-
-        f->f_root.ptr = 0;
-        f->f_root.len = 0;
-
-        string_truncate( buf, 0 );
-        path_build( f, buf );
-
-        if ( DEBUG_SEARCH )
-            printf( "search %s: %s\n", object_str( target ), buf->value );
-
-        key = object_new( buf->value );
-        timestamp_from_path( time, key );
-        object_free( key );
-    }
-
-    boundname = object_new( buf->value );
-    string_free( buf );
-
-    /* Prepare a call to BINDRULE if the variable is set. */
-    call_bind_rule( target, boundname );
-
-    return boundname;
-}
-
-
-static void free_binding( void * xbinding, void * data )
-{
-    object_free( ( (BINDING *)xbinding )->binding );
-}
-
-
-void search_done( void )
-{
-    if ( explicit_bindings )
-    {
-        hashenumerate( explicit_bindings, free_binding, 0 );
-        hashdone( explicit_bindings );
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/search.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/search.h b/ext/kenlm/jam-files/engine/search.h
deleted file mode 100644
index 7e74f79..0000000
--- a/ext/kenlm/jam-files/engine/search.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Copyright 1993, 1995 Christopher Seiwald.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/*
- * search.h - find a target along $(SEARCH) or $(LOCATE)
- */
-
-#ifndef SEARCH_SW20111118_H
-#define SEARCH_SW20111118_H
-
-#include "object.h"
-#include "timestamp.h"
-
-void set_explicit_binding( OBJECT * target, OBJECT * locate );
-OBJECT * search( OBJECT * target, timestamp * const time,
-    OBJECT * * another_target, int const file );
-void search_done( void );
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/strings.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/strings.c b/ext/kenlm/jam-files/engine/strings.c
deleted file mode 100644
index 3d3e19b..0000000
--- a/ext/kenlm/jam-files/engine/strings.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/* Copyright David Abrahams 2004. Distributed under the Boost */
-/* Software License, Version 1.0. (See accompanying */
-/* file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */
-
-#include "jam.h"
-#include "strings.h"
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-
-
-#ifndef NDEBUG
-# define JAM_STRING_MAGIC ((char)0xcf)
-# define JAM_STRING_MAGIC_SIZE 4
-static void assert_invariants( string * self )
-{
-    int i;
-
-    if ( self->value == 0 )
-    {
-        assert( self->size == 0 );
-        assert( self->capacity == 0 );
-        assert( self->opt[ 0 ] == 0 );
-        return;
-    }
-
-    assert( self->size < self->capacity );
-    assert( ( self->capacity <= sizeof( self->opt ) ) == ( self->value == self->opt ) );
-    assert( self->value[ self->size ] == 0 );
-    /* String objects modified manually after construction to contain embedded
-     * '\0' characters are considered structurally valid.
-     */
-    assert( strlen( self->value ) <= self->size );
-
-    for ( i = 0; i < 4; ++i )
-    {
-        assert( self->magic[ i ] == JAM_STRING_MAGIC );
-        assert( self->value[ self->capacity + i ] == JAM_STRING_MAGIC );
-    }
-}
-#else
-# define JAM_STRING_MAGIC_SIZE 0
-# define assert_invariants(x) do {} while (0)
-#endif
-
-
-void string_new( string * s )
-{
-    s->value = s->opt;
-    s->size = 0;
-    s->capacity = sizeof( s->opt );
-    s->opt[ 0 ] = 0;
-#ifndef NDEBUG
-    memset( s->magic, JAM_STRING_MAGIC, sizeof( s->magic ) );
-#endif
-    assert_invariants( s );
-}
-
-
-void string_free( string * s )
-{
-    assert_invariants( s );
-    if ( s->value != s->opt )
-        BJAM_FREE( s->value );
-    string_new( s );
-}
-
-
-static void string_reserve_internal( string * self, size_t capacity )
-{
-    if ( self->value == self->opt )
-    {
-        self->value = (char *)BJAM_MALLOC_ATOMIC( capacity +
-            JAM_STRING_MAGIC_SIZE );
-        self->value[ 0 ] = 0;
-        strncat( self->value, self->opt, sizeof(self->opt) );
-        assert( strlen( self->value ) <= self->capacity && "Regression test" );
-    }
-    else
-    {
-        self->value = (char *)BJAM_REALLOC( self->value, capacity +
-            JAM_STRING_MAGIC_SIZE );
-    }
-#ifndef NDEBUG
-    memcpy( self->value + capacity, self->magic, JAM_STRING_MAGIC_SIZE );
-#endif
-    self->capacity = capacity;
-}
-
-
-void string_reserve( string * self, size_t capacity )
-{
-    assert_invariants( self );
-    if ( capacity <= self->capacity )
-        return;
-    string_reserve_internal( self, capacity );
-    assert_invariants( self );
-}
-
-
-static void extend_full( string * self, char const * start, char const * finish )
-{
-    size_t new_size = self->capacity + ( finish - start );
-    size_t new_capacity = self->capacity;
-    size_t old_size = self->capacity;
-    while ( new_capacity < new_size + 1)
-        new_capacity <<= 1;
-    string_reserve_internal( self, new_capacity );
-    memcpy( self->value + old_size, start, new_size - old_size );
-    self->value[ new_size ] = 0;
-    self->size = new_size;
-}
-
-static void maybe_reserve( string * self, size_t new_size )
-{
-    size_t capacity = self->capacity;
-    if ( capacity <= new_size )
-    {
-        size_t new_capacity = capacity;
-        while ( new_capacity <= new_size )
-            new_capacity <<= 1;
-        string_reserve_internal( self, new_capacity );
-    }
-}
-
-
-void string_append( string * self, char const * rhs )
-{
-    size_t rhs_size = strlen( rhs );
-    size_t new_size = self->size + rhs_size;
-    assert_invariants( self );
-
-    maybe_reserve( self, new_size );
-
-    memcpy( self->value + self->size, rhs, rhs_size + 1 );
-    self->size = new_size;
-
-    assert_invariants( self );
-}
-
-
-void string_append_range( string * self, char const * start, char const * finish )
-{
-    size_t rhs_size = finish - start;
-    size_t new_size = self->size + rhs_size;
-    assert_invariants( self );
-
-    maybe_reserve( self, new_size );
-
-    memcpy( self->value + self->size, start, rhs_size );
-    self->size = new_size;
-    self->value[ new_size ] = 0;
-
-    assert_invariants( self );
-}
-
-
-void string_copy( string * s, char const * rhs )
-{
-    string_new( s );
-    string_append( s, rhs );
-}
-
-void string_truncate( string * self, size_t n )
-{
-    assert_invariants( self );
-    assert( n <= self->capacity );
-    self->value[ self->size = n ] = 0;
-    assert_invariants( self );
-}
-
-
-void string_pop_back( string * self )
-{
-    string_truncate( self, self->size - 1 );
-}
-
-
-void string_push_back( string * self, char x )
-{
-    string_append_range( self, &x, &x + 1 );
-}
-
-
-char string_back( string * self )
-{
-    assert_invariants( self );
-    return self->value[ self->size - 1 ];
-}
-
-
-#ifndef NDEBUG
-void string_unit_test()
-{
-    {
-        string s[ 1 ];
-        int i;
-        int const limit = sizeof( s->opt ) * 2 + 2;
-        string_new( s );
-        assert( s->value == s->opt );
-        for ( i = 0; i < limit; ++i )
-        {
-            string_push_back( s, (char)( i + 1 ) );
-            assert( s->size == i + 1 );
-        }
-        assert( s->size == limit );
-        assert( s->value != s->opt );
-        for ( i = 0; i < limit; ++i )
-            assert( s->value[ i ] == (char)( i + 1 ) );
-        string_free( s );
-    }
-
-    {
-        char * const original = "  \n\t\v  Foo \r\n\v \tBar\n\n\r\r\t\n\v\t \t";
-        string copy[ 1 ];
-        string_copy( copy, original );
-        assert( !strcmp( copy->value, original ) );
-        assert( copy->size == strlen( original ) );
-        string_free( copy );
-    }
-}
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/strings.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/strings.h b/ext/kenlm/jam-files/engine/strings.h
deleted file mode 100644
index 749f287..0000000
--- a/ext/kenlm/jam-files/engine/strings.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright 2004. David Abrahams
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-#ifndef STRINGS_DWA20011024_H
-#define STRINGS_DWA20011024_H
-
-#include <stddef.h>
-
-typedef struct string
-{
-    char * value;
-    unsigned long size;
-    unsigned long capacity;
-    char opt[ 32 ];
-#ifndef NDEBUG
-    char magic[ 4 ];
-#endif
-} string;
-
-void string_new( string * );
-void string_copy( string *, char const * );
-void string_free( string * );
-void string_append( string *, char const * );
-void string_append_range( string *, char const *, char const * );
-void string_push_back( string * s, char x );
-void string_reserve( string *, size_t );
-void string_truncate( string *, size_t );
-void string_pop_back( string * );
-char string_back( string * );
-void string_unit_test();
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/subst.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/subst.c b/ext/kenlm/jam-files/engine/subst.c
deleted file mode 100644
index a5fcee0..0000000
--- a/ext/kenlm/jam-files/engine/subst.c
+++ /dev/null
@@ -1,116 +0,0 @@
-#include "jam.h"
-#include "subst.h"
-
-#include "builtins.h"
-#include "frames.h"
-#include "hash.h"
-#include "lists.h"
-
-#include <stddef.h>
-
-
-typedef struct regex_entry
-{
-    OBJECT * pattern;
-    regexp * regex;
-} regex_entry;
-
-static struct hash * regex_hash;
-
-
-regexp * regex_compile( OBJECT * pattern )
-{
-    int found;
-    regex_entry * e ;
-
-    if ( !regex_hash )
-        regex_hash = hashinit( sizeof( regex_entry ), "regex" );
-
-    e = (regex_entry *)hash_insert( regex_hash, pattern, &found );
-    if ( !found )
-    {
-        e->pattern = object_copy( pattern );
-        e->regex = regcomp( (char *)pattern );
-    }
-
-    return e->regex;
-}
-
-
-LIST * builtin_subst( FRAME * frame, int flags )
-{
-    LIST * result = L0;
-    LIST * const arg1 = lol_get( frame->args, 0 );
-    LISTITER iter = list_begin( arg1 );
-    LISTITER const end = list_end( arg1 );
-
-    if ( iter != end && list_next( iter ) != end && list_next( list_next( iter )
-        ) != end )
-    {
-        char const * const source = object_str( list_item( iter ) );
-        OBJECT * const pattern = list_item( list_next( iter ) );
-        regexp * const repat = regex_compile( pattern );
-
-        if ( regexec( repat, (char *)source) )
-        {
-            LISTITER subst = list_next( iter );
-
-            while ( ( subst = list_next( subst ) ) != end )
-            {
-#define BUFLEN 4096
-                char buf[ BUFLEN + 1 ];
-                char const * in = object_str( list_item( subst ) );
-                char * out = buf;
-
-                for ( ; *in && out < buf + BUFLEN; ++in )
-                {
-                    if ( *in == '\\' || *in == '$' )
-                    {
-                        ++in;
-                        if ( *in == 0 )
-                            break;
-                        if ( *in >= '0' && *in <= '9' )
-                        {
-                            unsigned int const n = *in - '0';
-                            size_t const srclen = repat->endp[ n ] -
-                                repat->startp[ n ];
-                            size_t const remaining = buf + BUFLEN - out;
-                            size_t const len = srclen < remaining
-                                ? srclen
-                                : remaining;
-                            memcpy( out, repat->startp[ n ], len );
-                            out += len;
-                            continue;
-                        }
-                        /* fall through and copy the next character */
-                    }
-                    *out++ = *in;
-                }
-                *out = 0;
-
-                result = list_push_back( result, object_new( buf ) );
-#undef BUFLEN
-            }
-        }
-    }
-
-    return result;
-}
-
-
-static void free_regex( void * xregex, void * data )
-{
-    regex_entry * const regex = (regex_entry *)xregex;
-    object_free( regex->pattern );
-    BJAM_FREE( regex->regex );
-}
-
-
-void regex_done()
-{
-    if ( regex_hash )
-    {
-        hashenumerate( regex_hash, free_regex, (void *)0 );
-        hashdone( regex_hash );
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/subst.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/subst.h b/ext/kenlm/jam-files/engine/subst.h
deleted file mode 100644
index 7dc09a6..0000000
--- a/ext/kenlm/jam-files/engine/subst.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*  Copyright 2001-2004 David Abrahams.
- *  Distributed under the Boost Software License, Version 1.0.
- *  (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
- */
-
-#ifndef SUBST_JG20120722_H
-#define SUBST_JG20120722_H
-
-#include "object.h"
-#include "regexp.h"
-
-regexp * regex_compile( OBJECT * pattern );
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/timestamp.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/timestamp.c b/ext/kenlm/jam-files/engine/timestamp.c
deleted file mode 100644
index 0d01698..0000000
--- a/ext/kenlm/jam-files/engine/timestamp.c
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/* This file is ALSO:
- * Copyright 2001-2004 David Abrahams.
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-/*
- * timestamp.c - get the timestamp of a file or archive member
- *
- * External routines:
- *  timestamp_from_path() - return timestamp for a path, if present
- *  timestamp_done()      - free timestamp tables
- *
- * Internal routines:
- *  time_enter()      - internal worker callback for scanning archives &
- *                      directories
- *  free_timestamps() - worker function for freeing timestamp table contents
- */
-
-#include "jam.h"
-#include "timestamp.h"
-
-#include "filesys.h"
-#include "hash.h"
-#include "object.h"
-#include "pathsys.h"
-#include "strings.h"
-
-
-/*
- * BINDING - all known files
- */
-
-typedef struct _binding
-{
-    OBJECT * name;
-    short flags;
-
-#define BIND_SCANNED  0x01  /* if directory or arch, has been scanned */
-
-    short progress;
-
-#define BIND_INIT     0  /* never seen */
-#define BIND_NOENTRY  1  /* timestamp requested but file never found */
-#define BIND_SPOTTED  2  /* file found but not timed yet */
-#define BIND_MISSING  3  /* file found but can not get timestamp */
-#define BIND_FOUND    4  /* file found and time stamped */
-
-    /* update time - cleared if the there is nothing to bind */
-    timestamp time;
-} BINDING;
-
-static struct hash * bindhash = 0;
-
-static void time_enter( void *, OBJECT *, int const found,
-    timestamp const * const );
-
-static char * time_progress[] =
-{
-    "INIT",
-    "NOENTRY",
-    "SPOTTED",
-    "MISSING",
-    "FOUND"
-};
-
-
-#ifdef OS_NT
-/*
- * timestamp_from_filetime() - Windows FILETIME --> timestamp conversion
- *
- * Lifted shamelessly from the CPython implementation.
- */
-
-void timestamp_from_filetime( timestamp * const t, FILETIME const * const ft )
-{
-    /* Seconds between 1.1.1601 and 1.1.1970 */
-    static __int64 const secs_between_epochs = 11644473600;
-
-    /* We can not simply cast and dereference a FILETIME, since it might not be
-     * aligned properly. __int64 type variables are expected to be aligned to an
-     * 8 byte boundary while FILETIME structures may be aligned to any 4 byte
-     * boundary. Using an incorrectly aligned __int64 variable may cause a
-     * performance penalty on some platforms or even exceptions on others
-     * (documented on MSDN).
-     */
-    __int64 in;
-    memcpy( &in, ft, sizeof( in ) );
-
-    /* FILETIME resolution: 100ns. */
-    timestamp_init( t, (time_t)( ( in / 10000000 ) - secs_between_epochs ),
-        (int)( in % 10000000 ) * 100 );
-}
-#endif  /* OS_NT */
-
-
-void timestamp_clear( timestamp * const time )
-{
-    time->secs = time->nsecs = 0;
-}
-
-
-int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs )
-{
-    return lhs->secs == rhs->secs
-        ? lhs->nsecs - rhs->nsecs
-        : lhs->secs - rhs->secs;
-}
-
-
-void timestamp_copy( timestamp * const target, timestamp const * const source )
-{
-    target->secs = source->secs;
-    target->nsecs = source->nsecs;
-}
-
-
-void timestamp_current( timestamp * const t )
-{
-#ifdef OS_NT
-    /* GetSystemTimeAsFileTime()'s resolution seems to be about 15 ms on Windows
-     * XP and under a millisecond on Windows 7.
-     */
-    FILETIME ft;
-    GetSystemTimeAsFileTime( &ft );
-    timestamp_from_filetime( t, &ft );
-#else  /* OS_NT */
-    timestamp_init( t, time( 0 ), 0 );
-#endif  /* OS_NT */
-}
-
-
-int timestamp_empty( timestamp const * const time )
-{
-    return !time->secs && !time->nsecs;
-}
-
-
-/*
- * timestamp_from_path() - return timestamp for a path, if present
- */
-
-void timestamp_from_path( timestamp * const time, OBJECT * const path )
-{
-    PROFILE_ENTER( timestamp );
-
-    PATHNAME f1;
-    PATHNAME f2;
-    int found;
-    BINDING * b;
-    string buf[ 1 ];
-
-
-    if ( file_time( path, time ) < 0 )
-        timestamp_clear( time );
-
-    PROFILE_EXIT( timestamp );
-}
-
-
-void timestamp_init( timestamp * const time, time_t const secs, int const nsecs
-    )
-{
-    time->secs = secs;
-    time->nsecs = nsecs;
-}
-
-
-void timestamp_max( timestamp * const max, timestamp const * const lhs,
-    timestamp const * const rhs )
-{
-    if ( timestamp_cmp( lhs, rhs ) > 0 )
-        timestamp_copy( max, lhs );
-    else
-        timestamp_copy( max, rhs );
-}
-
-
-static char const * timestamp_formatstr( timestamp const * const time,
-    char const * const format )
-{
-    static char result1[ 500 ];
-    static char result2[ 500 ];
-    strftime( result1, sizeof( result1 ) / sizeof( *result1 ), format, gmtime(
-        &time->secs ) );
-    sprintf( result2, result1, time->nsecs );
-    return result2;
-}
-
-
-char const * timestamp_str( timestamp const * const time )
-{
-    return timestamp_formatstr( time, "%Y-%m-%d %H:%M:%S.%%09d +0000" );
-}
-
-
-char const * timestamp_timestr( timestamp const * const time )
-{
-    return timestamp_formatstr( time, "%H:%M:%S.%%09d" );
-}
-
-
-/*
- * time_enter() - internal worker callback for scanning archives & directories
- */
-
-static void time_enter( void * closure, OBJECT * target, int const found,
-    timestamp const * const time )
-{
-    int item_found;
-    BINDING * b;
-    struct hash * const bindhash = (struct hash *)closure;
-
-    target = path_as_key( target );
-
-    b = (BINDING *)hash_insert( bindhash, target, &item_found );
-    if ( !item_found )
-    {
-        b->name = object_copy( target );
-        b->flags = 0;
-    }
-
-    timestamp_copy( &b->time, time );
-    b->progress = found ? BIND_FOUND : BIND_SPOTTED;
-
-    if ( DEBUG_BINDSCAN )
-        printf( "time ( %s ) : %s\n", object_str( target ), time_progress[
-            b->progress ] );
-
-    object_free( target );
-}
-
-
-/*
- * free_timestamps() - worker function for freeing timestamp table contents
- */
-
-static void free_timestamps( void * xbinding, void * data )
-{
-    object_free( ( (BINDING *)xbinding )->name );
-}
-
-
-/*
- * timestamp_done() - free timestamp tables
- */
-
-void timestamp_done()
-{
-    if ( bindhash )
-    {
-        hashenumerate( bindhash, free_timestamps, 0 );
-        hashdone( bindhash );
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/timestamp.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/timestamp.h b/ext/kenlm/jam-files/engine/timestamp.h
deleted file mode 100644
index aaf1310..0000000
--- a/ext/kenlm/jam-files/engine/timestamp.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Copyright 1993, 1995 Christopher Seiwald.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/*
- * timestamp.h - get the timestamp of a file or archive member
- */
-
-#ifndef TIMESTAMP_H_SW_2011_11_18
-#define TIMESTAMP_H_SW_2011_11_18
-
-#include "object.h"
-
-#ifdef OS_NT
-# define WIN32_LEAN_AND_MEAN
-# include <windows.h>
-#endif
-
-#include <time.h>
-
-typedef struct timestamp
-{
-    time_t secs;
-    int nsecs;
-} timestamp;
-
-void timestamp_clear( timestamp * const );
-int timestamp_cmp( timestamp const * const lhs, timestamp const * const rhs );
-void timestamp_copy( timestamp * const target, timestamp const * const source );
-void timestamp_current( timestamp * const );
-int timestamp_empty( timestamp const * const );
-void timestamp_from_path( timestamp * const, OBJECT * const path );
-void timestamp_init( timestamp * const, time_t const secs, int const nsecs );
-void timestamp_max( timestamp * const max, timestamp const * const lhs,
-    timestamp const * const rhs );
-char const * timestamp_str( timestamp const * const );
-char const * timestamp_timestr( timestamp const * const );
-
-#ifdef OS_NT
-void timestamp_from_filetime( timestamp * const, FILETIME const * const );
-#endif
-
-void timestamp_done();
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/variable.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/variable.c b/ext/kenlm/jam-files/engine/variable.c
deleted file mode 100644
index 2c292fb..0000000
--- a/ext/kenlm/jam-files/engine/variable.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Copyright 1993, 2000 Christopher Seiwald.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/* This file is ALSO:
- * Copyright 2001-2004 David Abrahams.
- * Copyright 2005 Reece H. Dunn.
- * Copyright 2005 Rene Rivera.
- * Distributed under the Boost Software License, Version 1.0.
- * (See accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- */
-
-/*
- * variable.c - handle Jam multi-element variables.
- *
- * External routines:
- *
- *  var_defines() - load a bunch of variable=value settings
- *  var_get()     - get value of a user defined symbol
- *  var_set()     - set a variable in jam's user defined symbol table.
- *  var_swap()    - swap a variable's value with the given one
- *  var_done()    - free variable tables
- *
- * Internal routines:
- *
- *  var_enter() - make new var symbol table entry, returning var ptr
- *  var_dump()  - dump a variable to stdout
- */
-
-#include "jam.h"
-#include "variable.h"
-
-#include "filesys.h"
-#include "hash.h"
-#include "modules.h"
-#include "parse.h"
-#include "pathsys.h"
-#include "strings.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-
-/*
- * VARIABLE - a user defined multi-value variable
- */
-
-typedef struct _variable VARIABLE ;
-
-struct _variable
-{
-    OBJECT * symbol;
-    LIST   * value;
-};
-
-static LIST * * var_enter( struct module_t *, OBJECT * symbol );
-static void var_dump( OBJECT * symbol, LIST * value, char * what );
-
-
-/*
- * var_defines() - load a bunch of variable=value settings
- *
- * If preprocess is false, take the value verbatim.
- *
- * Otherwise, if the variable value is enclosed in quotes, strip the quotes.
- * Otherwise, if variable name ends in PATH, split value at :'s.
- * Otherwise, split the value at blanks.
- */
-
-void var_defines( struct module_t * module, char * const * e, int preprocess )
-{
-    string buf[ 1 ];
-
-    string_new( buf );
-
-    for ( ; *e; ++e )
-    {
-        char * val;
-
-        if ( ( val = strchr( *e, '=' ) )
-#if defined( OS_MAC )
-            /* On the mac (MPW), the var=val is actually var\0val */
-            /* Think different. */
-            || ( val = *e + strlen( *e ) )
-#endif
-        )
-        {
-            LIST * l = L0;
-            size_t const len = strlen( val + 1 );
-            int const quoted = ( val[ 1 ] == '"' ) && ( val[ len ] == '"' ) &&
-                ( len > 1 );
-
-            if ( quoted && preprocess )
-            {
-                string_append_range( buf, val + 2, val + len );
-                l = list_push_back( l, object_new( buf->value ) );
-                string_truncate( buf, 0 );
-            }
-            else
-            {
-                char * p;
-                char * pp;
-                char split =
-#if defined( OPT_NO_EXTERNAL_VARIABLE_SPLIT )
-                    '\0'
-#elif defined( OS_MAC )
-                    ','
-#else
-                    ' '
-#endif
-                    ;
-
-                /* Split *PATH at :'s, not spaces. */
-                if ( val - 4 >= *e )
-                {
-                    if ( !strncmp( val - 4, "PATH", 4 ) ||
-                        !strncmp( val - 4, "Path", 4 ) ||
-                        !strncmp( val - 4, "path", 4 ) )
-                        split = SPLITPATH;
-                }
-
-                /* Do the split. */
-                for
-                (
-                    pp = val + 1;
-                    preprocess && ( ( p = strchr( pp, split ) ) != 0 );
-                    pp = p + 1
-                )
-                {
-                    string_append_range( buf, pp, p );
-                    l = list_push_back( l, object_new( buf->value ) );
-                    string_truncate( buf, 0 );
-                }
-
-                l = list_push_back( l, object_new( pp ) );
-            }
-
-            /* Get name. */
-            string_append_range( buf, *e, val );
-            {
-                OBJECT * const varname = object_new( buf->value );
-                var_set( module, varname, l, VAR_SET );
-                object_free( varname );
-            }
-            string_truncate( buf, 0 );
-        }
-    }
-    string_free( buf );
-}
-
-
-/* Last returned variable value saved so we may clear it in var_done(). */
-static LIST * saved_var = L0;
-
-
-/*
- * var_get() - get value of a user defined symbol
- *
- * Returns NULL if symbol unset.
- */
-
-LIST * var_get( struct module_t * module, OBJECT * symbol )
-{
-    LIST * result = L0;
-#ifdef OPT_AT_FILES
-    /* Some "fixed" variables... */
-    if ( object_equal( symbol, constant_TMPDIR ) )
-    {
-        list_free( saved_var );
-        result = saved_var = list_new( object_new( path_tmpdir()->value ) );
-    }
-    else if ( object_equal( symbol, constant_TMPNAME ) )
-    {
-        list_free( saved_var );
-        result = saved_var = list_new( path_tmpnam() );
-    }
-    else if ( object_equal( symbol, constant_TMPFILE ) )
-    {
-        list_free( saved_var );
-        result = saved_var = list_new( path_tmpfile() );
-    }
-    else if ( object_equal( symbol, constant_STDOUT ) )
-    {
-        list_free( saved_var );
-        result = saved_var = list_new( object_copy( constant_STDOUT ) );
-    }
-    else if ( object_equal( symbol, constant_STDERR ) )
-    {
-        list_free( saved_var );
-        result = saved_var = list_new( object_copy( constant_STDERR ) );
-    }
-    else
-#endif
-    {
-        VARIABLE * v;
-        int n;
-
-        if ( ( n = module_get_fixed_var( module, symbol ) ) != -1 )
-        {
-            if ( DEBUG_VARGET )
-                var_dump( symbol, module->fixed_variables[ n ], "get" );
-            result = module->fixed_variables[ n ];
-        }
-        else if ( module->variables && ( v = (VARIABLE *)hash_find(
-            module->variables, symbol ) ) )
-        {
-            if ( DEBUG_VARGET )
-                var_dump( v->symbol, v->value, "get" );
-            result = v->value;
-        }
-    }
-    return result;
-}
-
-
-LIST * var_get_and_clear_raw( module_t * module, OBJECT * symbol )
-{
-    LIST * result = L0;
-    VARIABLE * v;
-
-    if ( module->variables && ( v = (VARIABLE *)hash_find( module->variables,
-        symbol ) ) )
-    {
-        result = v->value;
-        v->value = L0;
-    }
-
-    return result;
-}
-
-
-/*
- * var_set() - set a variable in Jam's user defined symbol table
- *
- * 'flag' controls the relationship between new and old values of the variable:
- * SET replaces the old with the new; APPEND appends the new to the old; DEFAULT
- * only uses the new if the variable was previously unset.
- *
- * Copies symbol. Takes ownership of value.
- */
-
-void var_set( struct module_t * module, OBJECT * symbol, LIST * value, int flag
-    )
-{
-    LIST * * v = var_enter( module, symbol );
-
-    if ( DEBUG_VARSET )
-        var_dump( symbol, value, "set" );
-
-    switch ( flag )
-    {
-    case VAR_SET:  /* Replace value */
-        list_free( *v );
-        *v = value;
-        break;
-
-    case VAR_APPEND:  /* Append value */
-        *v = list_append( *v, value );
-        break;
-
-    case VAR_DEFAULT:  /* Set only if unset */
-        if ( list_empty( *v ) )
-            *v = value;
-        else
-            list_free( value );
-        break;
-    }
-}
-
-
-/*
- * var_swap() - swap a variable's value with the given one
- */
-
-LIST * var_swap( struct module_t * module, OBJECT * symbol, LIST * value )
-{
-    LIST * * v = var_enter( module, symbol );
-    LIST * oldvalue = *v;
-    if ( DEBUG_VARSET )
-        var_dump( symbol, value, "set" );
-    *v = value;
-    return oldvalue;
-}
-
-
-/*
- * var_enter() - make new var symbol table entry, returning var ptr
- */
-
-static LIST * * var_enter( struct module_t * module, OBJECT * symbol )
-{
-    int found;
-    VARIABLE * v;
-    int n;
-
-    if ( ( n = module_get_fixed_var( module, symbol ) ) != -1 )
-        return &module->fixed_variables[ n ];
-
-    if ( !module->variables )
-        module->variables = hashinit( sizeof( VARIABLE ), "variables" );
-
-    v = (VARIABLE *)hash_insert( module->variables, symbol, &found );
-    if ( !found )
-    {
-        v->symbol = object_copy( symbol );
-        v->value = L0;
-    }
-
-    return &v->value;
-}
-
-
-/*
- * var_dump() - dump a variable to stdout
- */
-
-static void var_dump( OBJECT * symbol, LIST * value, char * what )
-{
-    printf( "%s %s = ", what, object_str( symbol ) );
-    list_print( value );
-    printf( "\n" );
-}
-
-
-/*
- * var_done() - free variable tables
- */
-
-static void delete_var_( void * xvar, void * data )
-{
-    VARIABLE * const v = (VARIABLE *)xvar;
-    object_free( v->symbol );
-    list_free( v->value );
-}
-
-void var_done( struct module_t * module )
-{
-    list_free( saved_var );
-    saved_var = L0;
-    hashenumerate( module->variables, delete_var_, 0 );
-    hash_free( module->variables );
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/variable.h
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/variable.h b/ext/kenlm/jam-files/engine/variable.h
deleted file mode 100644
index ddb452b..0000000
--- a/ext/kenlm/jam-files/engine/variable.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 1993, 2000 Christopher Seiwald.
- *
- * This file is part of Jam - see jam.c for Copyright information.
- */
-
-/*
- * variable.h - handle jam multi-element variables
- */
-
-#ifndef VARIABLE_SW20111119_H
-#define VARIABLE_SW20111119_H
-
-#include "lists.h"
-#include "object.h"
-
-
-struct module_t;
-
-void   var_defines( struct module_t *, char * const * e, int preprocess );
-LIST * var_get( struct module_t *, OBJECT * symbol );
-void   var_set( struct module_t *, OBJECT * symbol, LIST * value, int flag );
-LIST * var_swap( struct module_t *, OBJECT * symbol, LIST * value );
-void   var_done( struct module_t * );
-
-/*
- * Defines for var_set().
- */
-
-#define VAR_SET      0   /* override previous value */
-#define VAR_APPEND   1   /* append to previous value */
-#define VAR_DEFAULT  2   /* set only if no previous value */
-
-#endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/w32_getreg.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/w32_getreg.c b/ext/kenlm/jam-files/engine/w32_getreg.c
deleted file mode 100644
index dd2d0fc..0000000
--- a/ext/kenlm/jam-files/engine/w32_getreg.c
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
-Copyright Paul Lin 2003. Copyright 2006 Bojan Resnik.
-Distributed under the Boost Software License, Version 1.0. (See accompanying
-file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
-*/
-
-# include "jam.h"
-
-# if defined( OS_NT ) || defined( OS_CYGWIN )
-
-# include "lists.h"
-# include "object.h"
-# include "parse.h"
-# include "frames.h"
-# include "strings.h"
-
-# define WIN32_LEAN_AND_MEAN
-# include <windows.h>
-
-# define  MAX_REGISTRY_DATA_LENGTH 4096
-# define  MAX_REGISTRY_KEYNAME_LENGTH 256
-# define  MAX_REGISTRY_VALUENAME_LENGTH 16384
-
-typedef struct
-{
-    LPCSTR  name;
-    HKEY    value;
-} KeyMap;
-
-static const KeyMap dlRootKeys[] = {
-    { "HKLM", HKEY_LOCAL_MACHINE },
-    { "HKCU", HKEY_CURRENT_USER },
-    { "HKCR", HKEY_CLASSES_ROOT },
-    { "HKEY_LOCAL_MACHINE", HKEY_LOCAL_MACHINE },
-    { "HKEY_CURRENT_USER", HKEY_CURRENT_USER },
-    { "HKEY_CLASSES_ROOT", HKEY_CLASSES_ROOT },
-    { 0, 0 }
-};
-
-static HKEY get_key(char const** path)
-{
-    const KeyMap *p;
-
-    for (p = dlRootKeys; p->name; ++p)
-    {
-        int n = strlen(p->name);
-        if (!strncmp(*path,p->name,n))
-        {
-            if ((*path)[n] == '\\' || (*path)[n] == 0)
-            {
-                *path += n + 1;
-                break;
-            }
-        }
-    }
-
-    return p->value;
-}
-
-LIST * builtin_system_registry( FRAME * frame, int flags )
-{
-    char const* path = object_str( list_front( lol_get(frame->args, 0) ) );
-    LIST* result = L0;
-    HKEY key = get_key(&path);
-
-    if (
-        key != 0
-        && ERROR_SUCCESS == RegOpenKeyEx(key, path, 0, KEY_QUERY_VALUE, &key)
-    )
-    {
-        DWORD  type;
-        BYTE   data[MAX_REGISTRY_DATA_LENGTH];
-        DWORD  len = sizeof(data);
-        LIST * const field = lol_get(frame->args, 1);
-
-        if ( ERROR_SUCCESS ==
-             RegQueryValueEx(key, field ? object_str( list_front( field ) ) : 0, 0, &type, data, &len) )
-        {
-            switch (type)
-            {
-
-             case REG_EXPAND_SZ:
-                 {
-                     long len;
-                     string expanded[1];
-                     string_new(expanded);
-
-                     while (
-                         (len = ExpandEnvironmentStrings(
-                             (LPCSTR)data, expanded->value, expanded->capacity))
-                         > expanded->capacity
-                     )
-                         string_reserve(expanded, len);
-
-                     expanded->size = len - 1;
-
-                     result = list_push_back( result, object_new(expanded->value) );
-                     string_free( expanded );
-                 }
-                 break;
-
-             case REG_MULTI_SZ:
-                 {
-                     char* s;
-
-                     for (s = (char*)data; *s; s += strlen(s) + 1)
-                         result = list_push_back( result, object_new(s) );
-
-                 }
-                 break;
-
-             case REG_DWORD:
-                 {
-                     char buf[100];
-                     sprintf( buf, "%u", *(PDWORD)data );
-                     result = list_push_back( result, object_new(buf) );
-                 }
-                 break;
-
-             case REG_SZ:
-                 result = list_push_back( result, object_new( (const char *)data ) );
-                 break;
-            }
-        }
-        RegCloseKey(key);
-    }
-    return  result;
-}
-
-static LIST* get_subkey_names(HKEY key, char const* path)
-{
-    LIST* result = 0;
-
-    if ( ERROR_SUCCESS ==
-         RegOpenKeyEx(key, path, 0, KEY_ENUMERATE_SUB_KEYS, &key)
-    )
-    {
-        char name[MAX_REGISTRY_KEYNAME_LENGTH];
-        DWORD name_size = sizeof(name);
-        DWORD index;
-        FILETIME last_write_time;
-
-        for ( index = 0;
-              ERROR_SUCCESS == RegEnumKeyEx(
-                  key, index, name, &name_size, 0, 0, 0, &last_write_time);
-              ++index,
-              name_size = sizeof(name)
-        )
-        {
-            name[name_size] = 0;
-            result = list_append(result, list_new(object_new(name)));
-        }
-
-        RegCloseKey(key);
-    }
-
-    return result;
-}
-
-static LIST* get_value_names(HKEY key, char const* path)
-{
-    LIST* result = 0;
-
-    if ( ERROR_SUCCESS == RegOpenKeyEx(key, path, 0, KEY_QUERY_VALUE, &key) )
-    {
-        char name[MAX_REGISTRY_VALUENAME_LENGTH];
-        DWORD name_size = sizeof(name);
-        DWORD index;
-
-        for ( index = 0;
-              ERROR_SUCCESS == RegEnumValue(
-                  key, index, name, &name_size, 0, 0, 0, 0);
-              ++index,
-              name_size = sizeof(name)
-        )
-        {
-            name[name_size] = 0;
-            result = list_append(result, list_new(object_new(name)));
-        }
-
-        RegCloseKey(key);
-    }
-
-    return result;
-}
-
-LIST * builtin_system_registry_names( FRAME * frame, int flags )
-{
-    char const* path        = object_str( list_front( lol_get(frame->args, 0) ) );
-    char const* result_type = object_str( list_front( lol_get(frame->args, 1) ) );
-
-    HKEY key = get_key(&path);
-
-    if ( !strcmp(result_type, "subkeys") )
-        return get_subkey_names(key, path);
-    if ( !strcmp(result_type, "values") )
-        return get_value_names(key, path);
-    return 0;
-}
-
-# endif

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/engine/yyacc.c
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/engine/yyacc.c b/ext/kenlm/jam-files/engine/yyacc.c
deleted file mode 100644
index b5efc96..0000000
--- a/ext/kenlm/jam-files/engine/yyacc.c
+++ /dev/null
@@ -1,268 +0,0 @@
-/* Copyright 2002 Rene Rivera.
-** Distributed under the Boost Software License, Version 1.0.
-** (See accompanying file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
-*/
-
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <stdlib.h>
-
-/*
-# yyacc - yacc wrapper
-#
-# Allows tokens to be written as `literal` and then automatically
-# substituted with #defined tokens.
-#
-# Usage:
-#    yyacc file.y filetab.h file.yy
-#
-# inputs:
-#    file.yy        yacc grammar with ` literals
-#
-# outputs:
-#    file.y        yacc grammar
-#    filetab.h    array of string <-> token mappings
-#
-# 3-13-93
-#    Documented and p moved in sed command (for some reason,
-#    s/x/y/p doesn't work).
-# 10-12-93
-#    Take basename as second argument.
-# 12-31-96
-#    reversed order of args to be compatible with GenFile rule
-# 11-20-2002
-#    Reimplemented as a C program for portability. (Rene Rivera)
-*/
-
-void print_usage();
-char * copy_string(char * s, int l);
-char * tokenize_string(char * s);
-int cmp_literal(const void * a, const void * b);
-
-typedef struct
-{
-    char * string;
-    char * token;
-} literal;
-
-int main(int argc, char ** argv)
-{
-    int result = 0;
-    if (argc != 4)
-    {
-        print_usage();
-        result = 1;
-    }
-    else
-    {
-        FILE * token_output_f = 0;
-        FILE * grammar_output_f = 0;
-        FILE * grammar_source_f = 0;
-
-        grammar_source_f = fopen(argv[3],"r");
-        if (grammar_source_f == 0) { result = 1; }
-        if (result == 0)
-        {
-            literal literals[1024];
-            int t = 0;
-            char l[2048];
-            while (1)
-            {
-                if (fgets(l,2048,grammar_source_f) != 0)
-                {
-                    char * c = l;
-                    while (1)
-                    {
-                        char * c1 = strchr(c,'`');
-                        if (c1 != 0)
-                        {
-                            char * c2 = strchr(c1+1,'`');
-                            if (c2 != 0)
-                            {
-                                literals[t].string = copy_string(c1+1,c2-c1-1);
-                                literals[t].token = tokenize_string(literals[t].string);
-                                t += 1;
-                                c = c2+1;
-                            }
-                            else
-                                break;
-                        }
-                        else
-                            break;
-                    }
-                }
-                else
-                {
-                    break;
-                }
-            }
-            literals[t].string = 0;
-            literals[t].token = 0;
-            qsort(literals,t,sizeof(literal),cmp_literal);
-            {
-                int p = 1;
-                int i = 1;
-                while (literals[i].string != 0)
-                {
-                    if (strcmp(literals[p-1].string,literals[i].string) != 0)
-                    {
-                        literals[p] = literals[i];
-                        p += 1;
-                    }
-                    i += 1;
-                }
-                literals[p].string = 0;
-                literals[p].token = 0;
-                t = p;
-            }
-            token_output_f = fopen(argv[2],"w");
-            if (token_output_f != 0)
-            {
-                int i = 0;
-                while (literals[i].string != 0)
-                {
-                    fprintf(token_output_f,"    { \"%s\", %s },\n",literals[i].string,literals[i].token);
-                    i += 1;
-                }
-                fclose(token_output_f);
-            }
-            else
-                result = 1;
-            if (result == 0)
-            {
-                grammar_output_f = fopen(argv[1],"w");
-                if (grammar_output_f != 0)
-                {
-                    int i = 0;
-                    while (literals[i].string != 0)
-                    {
-                        fprintf(grammar_output_f,"%%token %s\n",literals[i].token);
-                        i += 1;
-                    }
-                    rewind(grammar_source_f);
-                    while (1)
-                    {
-                        if (fgets(l,2048,grammar_source_f) != 0)
-                        {
-                            char * c = l;
-                            while (1)
-                            {
-                                char * c1 = strchr(c,'`');
-                                if (c1 != 0)
-                                {
-                                    char * c2 = strchr(c1+1,'`');
-                                    if (c2 != 0)
-                                    {
-                                        literal key;
-                                        literal * replacement = 0;
-                                        key.string = copy_string(c1+1,c2-c1-1);
-                                        key.token = 0;
-                                        replacement = (literal*)bsearch(
-                                            &key,literals,t,sizeof(literal),cmp_literal);
-                                        *c1 = 0;
-                                        fprintf(grammar_output_f,"%s%s",c,replacement->token);
-                                        c = c2+1;
-                                    }
-                                    else
-                                    {
-                                        fprintf(grammar_output_f,"%s",c);
-                                        break;
-                                    }
-                                }
-                                else
-                                {
-                                    fprintf(grammar_output_f,"%s",c);
-                                    break;
-                                }
-                            }
-                        }
-                        else
-                        {
-                            break;
-                        }
-                    }
-                    fclose(grammar_output_f);
-                }
-                else
-                    result = 1;
-            }
-        }
-        if (result != 0)
-        {
-            perror("yyacc");
-        }
-    }
-    return result;
-}
-
-static char * usage[] = {
-    "yyacc <grammar output.y> <token table output.h> <grammar source.yy>",
-    0 };
-
-void print_usage()
-{
-    char ** u;
-    for (u = usage; *u != 0; ++u)
-    {
-        fputs(*u,stderr); putc('\n',stderr);
-    }
-}
-
-char * copy_string(char * s, int l)
-{
-    char * result = (char*)malloc(l+1);
-    strncpy(result,s,l);
-    result[l] = 0;
-    return result;
-}
-
-char * tokenize_string(char * s)
-{
-    char * result;
-    char * literal = s;
-    int l;
-    int c;
-
-    if (strcmp(s,":") == 0) literal = "_colon";
-    else if (strcmp(s,"!") == 0) literal = "_bang";
-    else if (strcmp(s,"!=") == 0) literal = "_bang_equals";
-    else if (strcmp(s,"&&") == 0) literal = "_amperamper";
-    else if (strcmp(s,"&") == 0) literal = "_amper";
-    else if (strcmp(s,"+") == 0) literal = "_plus";
-    else if (strcmp(s,"+=") == 0) literal = "_plus_equals";
-    else if (strcmp(s,"||") == 0) literal = "_barbar";
-    else if (strcmp(s,"|") == 0) literal = "_bar";
-    else if (strcmp(s,";") == 0) literal = "_semic";
-    else if (strcmp(s,"-") == 0) literal = "_minus";
-    else if (strcmp(s,"<") == 0) literal = "_langle";
-    else if (strcmp(s,"<=") == 0) literal = "_langle_equals";
-    else if (strcmp(s,">") == 0) literal = "_rangle";
-    else if (strcmp(s,">=") == 0) literal = "_rangle_equals";
-    else if (strcmp(s,".") == 0) literal = "_period";
-    else if (strcmp(s,"?") == 0) literal = "_question";
-    else if (strcmp(s,"?=") == 0) literal = "_question_equals";
-    else if (strcmp(s,"=") == 0) literal = "_equals";
-    else if (strcmp(s,",") == 0) literal = "_comma";
-    else if (strcmp(s,"[") == 0) literal = "_lbracket";
-    else if (strcmp(s,"]") == 0) literal = "_rbracket";
-    else if (strcmp(s,"{") == 0) literal = "_lbrace";
-    else if (strcmp(s,"}") == 0) literal = "_rbrace";
-    else if (strcmp(s,"(") == 0) literal = "_lparen";
-    else if (strcmp(s,")") == 0) literal = "_rparen";
-    l = strlen(literal)+2;
-    result = (char*)malloc(l+1);
-    for (c = 0; literal[c] != 0; ++c)
-    {
-        result[c] = toupper(literal[c]);
-    }
-    result[l-2] = '_';
-    result[l-1] = 't';
-    result[l] = 0;
-    return result;
-}
-
-int cmp_literal(const void * a, const void * b)
-{
-    return strcmp(((const literal *)a)->string,((const literal *)b)->string);
-}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/fail/Jamroot
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/fail/Jamroot b/ext/kenlm/jam-files/fail/Jamroot
deleted file mode 100644
index c3584d8..0000000
--- a/ext/kenlm/jam-files/fail/Jamroot
+++ /dev/null
@@ -1,4 +0,0 @@
-actions fail {
-  false
-}
-make fail : : fail ;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/jam-files/sanity.jam
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/jam-files/sanity.jam b/ext/kenlm/jam-files/sanity.jam
deleted file mode 100644
index 1851ece..0000000
--- a/ext/kenlm/jam-files/sanity.jam
+++ /dev/null
@@ -1,344 +0,0 @@
-import modules ;
-import option ;
-import os ;
-import path ;
-import project ;
-import build-system ;
-import version ;
-
-#Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php
-rule trim-nl ( str extras * ) {
-return [ MATCH "([^
-]*)" : $(str) ] $(extras) ;
-}
-rule _shell ( cmd : extras * ) {
-  return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ;
-}
-
-rule shell_or_fail ( cmd ) {
-  local ret = [ SHELL $(cmd) : exit-status ] ;
-  if $(ret[2]) != 0 {
-    exit $(cmd) failed : 1 ;
-  }
-}
-
-rule shell_or_die ( cmd ) {
-  local ret = [ SHELL $(cmd) : exit-status ] ;
-  if $(ret[2]) != 0 {
-    exit $(cmd) failed : 1 ;
-  }
-  return [ trim-nl $(ret[1]) ] ;
-}
-
-cxxflags = [ os.environ "CXXFLAGS" ] ;
-cflags = [ os.environ "CFLAGS" ] ;
-ldflags = [ os.environ "LDFLAGS" ] ;
-
-#Run g++ with empty main and these arguments to see if it passes.  
-rule test_flags ( flags * : main ? ) {
-  flags = $(cxxflags) $(ldflags) $(flags) ;
-  if ! $(main) {
-    main = "int main() {}" ;
-  }
-  local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'$(main)' -o $(TOP)/dummy >/dev/null 2>/dev/null && rm $(TOP)/dummy 2>/dev/null\"" ;
-  local ret = [ SHELL $(cmd) : exit-status ] ;
-  if --debug-configuration in [ modules.peek : ARGV ] {
-    echo $(cmd) ;
-    echo $(ret) ;
-  }
-  if $(ret[2]) = 0 {
-    return true ;
-  } else {
-    return ;
-  }
-}
-
-rule test_header ( name ) {
-  return [ test_flags "-include $(name)" ] ;
-}
-
-requirements = ;
-
-FORCE-STATIC = [ option.get "static" : : "yes" ] ;
-if $(FORCE-STATIC) {
-  requirements += <link>static <runtime-link>static ;
-}
-
-rule test_library ( name ) {
-  if $(FORCE-STATIC) {
-    return [ test_flags "-Wl,-Bstatic -l$(name) -Wl,-Bdynamic" ] ;
-  } else {
-    return [ test_flags "-l$(name)" ] ;
-  }
-}
-
-{
-  local cleaning = [ option.get "clean" : : yes ] ;
-  cleaning ?= [ option.get "clean-all" : no : yes ] ;
-  if "clean" in [ modules.peek : ARGV ] {
-    cleaning = yes ;
-  }
-  constant CLEANING : $(cleaning) ;
-}
-
-shared-command-line = ;
-local argv = [ modules.peek : ARGV ] ;
-while $(argv) {
-  if $(argv[1]) = "link=shared" {
-    shared-command-line = <link>shared ;
-  }
-  argv = $(argv[2-]) ;
-}
-
-#Determine if a library can be compiled statically.  
-rule auto-shared ( name : additional * ) {
-
-  additional ?= "" ;
-  if $(shared-command-line) = "<link>shared" {
-    return "<link>shared" ;
-  } else {
-    if [ test_flags $(additional)" -Wl,-Bstatic -l"$(name)" -Wl,-Bdynamic" ] {
-      return ;
-    } else {
-      if $(FORCE-STATIC) {
-        echo "Could not statically link against lib $(name).  Your build will probably fail." ;
-        return ;
-      } else {
-        return "<link>shared" ;
-      }
-    }
-  }
-}
-
-# MacPorts' default location is /opt/local -- use this if no path is given.
-with-macports = [ option.get "with-macports" : : "/opt/local" ] ;
-if $(with-macports) {
-  using darwin ;
-  ECHO "Using --with-macports=$(with-macports), implying use of darwin GCC" ;
-
-  L-boost-search = -L$(with-macports)/lib ;
-  boost-search = <search>$(with-macports)/lib ;
-  I-boost-include = -I$(with-macports)/include ;
-  boost-include = <include>$(with-macports)/include ;
-  requirements += $(boost-include) ;
-} else {
-  with-boost = [ option.get "with-boost" ] ;
-  with-boost ?= [ os.environ "BOOST_ROOT" ] ;
-  if $(with-boost) {
-    L-boost-search = -L$(with-boost)/lib" "-L$(with-boost)/lib64 ;
-    boost-search = <search>$(with-boost)/lib <search>$(with-boost)/lib64 ;
-    I-boost-include = -I$(with-boost)/include ;
-    boost-include = <include>$(with-boost)/include ;
-    requirements += $(boost-include) ;
-  } else {
-    L-boost-search = "" ;
-    boost-search = ;
-    I-boost-include = "" ;
-    boost-include = ;
-  }
-}
- 
-#Convenience rule for boost libraries.  Defines library boost_$(name).  
-rule boost-lib ( name macro : deps * ) {
-  lib boost_$(name)_static : $(deps) : $(boost-search) <name>boost_$(name)$(boost-lib-version) <link>static ;
-  lib boost_$(name)_shared : $(deps) : $(boost-search) <name>boost_$(name)$(boost-lib-version) <link>shared : : <define>BOOST_$(macro) ;
-
-  alias boost_$(name)_default : $(deps) : <link>static:<source>boost_$(name)_static <link>shared:<source>boost_$(name)_shared ;
-
-  alias boost_$(name)_static_works : $(deps) : [ check-target-builds empty_test_shared "Shared Boost" : <source>boost_$(name)_default : <source>boost_$(name)_static ] ;
-  alias boost_$(name) : $(deps) : [ check-target-builds empty_test_static "Static Boost" : <source>boost_$(name)_static_works : <source>boost_$(name)_shared ] ;
-}
-
-#Argument is e.g. 103600
-rule boost ( min-version ) {
-  local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_'\"" ;
-  local boost-shell = [ SHELL "$(cmd)" : exit-status ] ;
-  if $(boost-shell[2]) != 0 && $(CLEANING) = no {
-    echo Failed to run "$(cmd)" ;
-    exit Boost does not seem to be installed or g++ is confused. : 1 ;
-  }
-  constant BOOST-VERSION : [ MATCH "#define BOOST_VERSION ([0-9]*)" : $(boost-shell[1]) ] ;
-  if $(BOOST-VERSION) < $(min-version) && $(CLEANING) = no {
-    exit You have Boost $(BOOST-VERSION).  This package requires Boost at least $(min-version) (and preferably newer). : 1 ;
-  }
-  # If matching version tags exist, use them.  
-  boost-lib-version = [ MATCH "#define BOOST_LIB_VERSION \"([^\"]*)\"" : $(boost-shell[1]) ] ;
-  if [ test_flags $(L-boost-search)" -lboost_program_options-"$(boost-lib-version) ] {
-    boost-lib-version = "-"$(boost-lib-version) ;
-  } else {
-    boost-lib-version = "" ;
-  }
-
-  #Crazy amount of testing to make sure that BOOST_TEST_DYN_LINK is defined properly.
-  lib boost_unit_test_framework_static_test : : $(boost-search) <name>boost_unit_test_framework$(boost-lib-version) <link>static ;
-  obj empty_test_static.o : jam-files/empty_test_main.cc boost_unit_test_framework_static_test : $(boost-include) ;
-  exe empty_test_static : empty_test_static.o boost_unit_test_framework_static_test ;
-
-  lib boost_unit_test_framework_shared_test : : $(boost-search) <name>boost_unit_test_framework$(boost-lib-version) <link>shared : : <define>BOOST_TEST_DYN_LINK ;
-  obj empty_test_shared.o : jam-files/empty_test_main.cc boost_unit_test_framework_shared_test : $(boost-include) ;
-  exe empty_test_shared : empty_test_shared.o boost_unit_test_framework_shared_test ;
-
-  explicit empty_test_static.o empty_test_static empty_test_shared.o empty_test_shared ;
-
-
-  #See tools/build/v2/contrib/boost.jam in a boost distribution for a table of macros to define.   
-  boost-lib system SYSTEM_DYN_LINK ;
-  boost-lib thread THREAD_DYN_DLL : boost_system ;
-  boost-lib program_options PROGRAM_OPTIONS_DYN_LINK ;
-  boost-lib iostreams IOSTREAMS_DYN_LINK ;
-  boost-lib filesystem FILE_SYSTEM_DYN_LINK ;
-  boost-lib unit_test_framework TEST_DYN_LINK ;
-#  if $(BOOST-VERSION) >= 104800 {
-#    boost-lib chrono CHRONO_DYN_LINK ;
-#    boost-lib timer TIMER_DYN_LINK : boost_chrono ;
-#  }
-}
- 
-#Link normally to a library, but sometimes static isn't installed so fall back to dynamic.
-rule external-lib ( name : search-path * : deps * ) {
-  lib $(name) : : [ auto-shared $(name) : "-L"$(search-path) ] <search>$(search-path) <use>$(deps) ;
-}
-
-#Write the current command line to previous.sh.  This does not do shell escaping.  
-{
-  local build-log = $(TOP)/previous.sh ;
-  if ! [ path.exists $(build-log) ] {
-    SHELL "touch \"$(build-log)\" && chmod +x \"$(build-log)\"" ;
-  }
-  local script = [ modules.peek : ARGV ] ;
-  if $(script[1]) = "./jam-files/bjam" {
-    #The ./bjam shell script calls ./jam-files/bjam so that appears in argv but
-    #we want ./bjam to appear so the environment variables are set correctly.  
-    script = "./bjam "$(script[2-]:J=" ") ;
-  } else {
-    script = $(script:J=" ") ;
-  }
-  script = "#!/bin/sh\n$(script)\n" ;
-  local ignored = @($(build-log):E=$(script)) ;
-}
-
-#Boost jam's static clang for Linux is buggy.
-requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <os>LINUX,<toolset>clang:<link>shared ;
-
-if ! [ option.get "without-libsegfault" : : "yes" ] && ! $(FORCE-STATIC) {
-  #libSegFault prints a stack trace on segfault.  Link against it if available.  
-  if [ test_flags "-lSegFault" ] {
-    external-lib SegFault ;
-    requirements += <library>SegFault ;
-  }
-}
-
-if [ option.get "git" : : "yes" ] {
-  local revision = [ _shell "git rev-parse --verify HEAD |head -c 7" ] ;
-  constant GITTAG : "/"$(revision) ;
-} else {
-  constant GITTAG : "" ;
-}
-
-local prefix = [ option.get "prefix" ] ;
-if $(prefix) {
-  prefix = [ path.root $(prefix) [ path.pwd ] ] ;
-  prefix = $(prefix)$(GITTAG) ;
-} else {
-  prefix = $(TOP)$(GITTAG) ;
-}
-
-path-constant PREFIX : $(prefix) ;
-
-path-constant BINDIR : [ option.get "bindir" : $(PREFIX)/bin ] ;
-path-constant LIBDIR : [ option.get "libdir" : $(PREFIX)/lib ] ;
-rule install-bin-libs ( deps * ) {
-  install prefix-bin : $(deps) : <location>$(BINDIR) <install-dependencies>on <install-type>EXE <link>shared:<dll-path>$(LIBDIR) ;
-  install prefix-lib : $(deps) : <location>$(LIBDIR) <install-dependencies>on <install-type>LIB <link>shared:<dll-path>$(LIBDIR) ;
-}
-rule install-headers ( name : list * : source-root ? ) {
-  local includedir = [ option.get "includedir" : $(prefix)/include ] ;
-  source-root ?= "." ;
-  install $(name) : $(list) : <location>$(includedir) <install-source-root>$(source-root) ;
-}
-
-rule build-projects ( projects * ) {
-  for local p in $(projects) {
-    build-project $(p) ;
-  }
-}
-
-#Only one post build hook is allowed.  Allow multiple.  
-post-hooks = ;
-rule post-build ( ok ? ) {
-  for local r in $(post-hooks) {
-    $(r) $(ok) ;
-  }
-}
-IMPORT $(__name__) : post-build : : $(__name__).post-build ;
-build-system.set-post-build-hook $(__name__).post-build ;
-rule add-post-hook ( names * ) {
-  post-hooks += $(names) ;
-}
-
-rule failure-message ( ok ? ) {
-  if $(ok) != "ok" {
-    local args = [ modules.peek : ARGV ] ;
-    local args = $(args:J=" ") ;
-    if --debug-configuration in [ modules.peek : ARGV ] {
-      echo "The build failed with command line: " ;
-      echo "  $(args)" ;
-      echo "If you need support, attach the full output to your e-mail." ;
-    } else {
-      echo "The build failed.  If you need support, run:" ;
-      echo "  $(args) --debug-configuration -d2 |gzip >build.log.gz" ;
-      echo "then attach build.log.gz to your e-mail." ;
-    }
-    echo "ERROR" ;
-  } else {
-    echo "SUCCESS" ;
-  }
-}
-add-post-hook failure-message ;
-
-import feature : feature ;
-feature options-to-write : : free ;
-import toolset : flags ;
-flags write-options OPTIONS-TO-WRITE <options-to-write> ;
-actions write-options {
-  echo "$(OPTIONS-TO-WRITE)" > $(<) ;
-}
-
-#Compare contents of file with current.  If they're different, write to the 
-#file.  This file can then be used with <dependency>$(file) to force
-#recompilation.
-rule update-if-changed ( file current ) {
-  if ( ! [ path.exists $(file) ] ) || ( [ _shell "cat $(file)" ] != $(current) ) {
-    make $(file) : : $(__name__).write-options : <options-to-write>$(current) ;
-    always $(file) ;
-  }
-}
-
-if [ option.get "sanity-test" : : "yes" ] {
-  local current_version = [ modules.peek : JAM_VERSION ] ;
-  if ( $(current_version[0]) < 2000 && [ version.check-jam-version 3 1 16 ] ) || [ version.check-jam-version 2011 0 0 ] {
-    EXIT "Sane" : 0 ;
-  } else {
-    EXIT "Bad" : 1 ;
-  }
-}
-
-#Hack to act like alias in the sense that no lib is built, but only build cpp files once.  
-import type ;
-rule fakelib ( name : deps * : requirements * : default-build * : usage-requirements * ) {
-  local c-files = ;
-  local real-deps = ;
-  for local c in $(deps) {
-    if [ type.type $(c) ] = CPP {
-      c-files += $(c) ;
-    } else {
-      real-deps += $(c) ;
-    }
-  }
-  for local c in $(c-files) {
-    obj $(c:B).o : $(c) $(real-deps) : $(requirements) : $(default-build) : $(usage_requirements) ;
-  }
-  alias $(name) : $(c-files:B).o $(real-deps) : $(requirements) : $(default-build) : $(usage-requirements) ;
-}
-
-use-project /top : . ;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/CMakeLists.txt b/ext/kenlm/lm/CMakeLists.txt
deleted file mode 100644
index e3ef06f..0000000
--- a/ext/kenlm/lm/CMakeLists.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-cmake_minimum_required(VERSION 2.8.8)
-#
-# The KenLM cmake files make use of add_library(... OBJECTS ...)
-# 
-# This syntax allows grouping of source files when compiling
-# (effectively creating "fake" libraries based on source subdirs).
-# 
-# This syntax was only added in cmake version 2.8.8
-#
-# see http://www.cmake.org/Wiki/CMake/Tutorials/Object_Library
-
-
-# This CMake file was created by Lane Schwartz <do...@gmail.com>
-
-
-set(KENLM_MAX_ORDER 6 CACHE STRING "Maximum supported ngram order")
-
-add_definitions(-DKENLM_MAX_ORDER=${KENLM_MAX_ORDER})
-
-
-# Explicitly list the source files for this subdirectory
-#
-# If you add any source files to this subdirectory
-#    that should be included in the kenlm library,
-#        (this excludes any unit test files)
-#    you should add them to the following list:
-set(KENLM_SOURCE 
-	bhiksha.cc
-	binary_format.cc
-	config.cc
-	lm_exception.cc
-	model.cc
-	quantize.cc
-	read_arpa.cc
-	search_hashed.cc
-	search_trie.cc
-	sizes.cc
-	trie.cc
-	trie_sort.cc
-	value_build.cc
-	virtual_interface.cc
-	vocab.cc
-)
-
-
-# Group these objects together for later use. 
-#
-# Given add_library(foo OBJECT ${my_foo_sources}),
-# refer to these objects as $<TARGET_OBJECTS:foo>
-#
-add_library(kenlm OBJECT ${KENLM_SOURCE})
-
-# This directory has children that need to be processed
-add_subdirectory(builder)
-add_subdirectory(common)
-add_subdirectory(filter)
-
-
-
-# Explicitly list the executable files to be compiled
-set(EXE_LIST
-  query
-  fragment
-  build_binary
-)
-
-AddExes(EXES ${EXE_LIST}
-        DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
-        LIBRARIES ${Boost_LIBRARIES} pthread)
-
-# Conditionally build the interpolation code
-if(BUILD_INTERPOLATE)
-  add_subdirectory(interpolate)
-endif()
-
-if(BUILD_TESTING)
-
-  set(KENLM_BOOST_TESTS_LIST left_test partial_test)
-  AddTests(TESTS ${KENLM_BOOST_TESTS_LIST}
-           DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
-           LIBRARIES ${Boost_LIBRARIES} pthread
-           TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa)
-
-  # model_test requires an extra command line parameter
-  KenLMAddTest(TEST model_test
-               DEPENDS $<TARGET_OBJECTS:kenlm> $<TARGET_OBJECTS:kenlm_util>
-               LIBRARIES ${Boost_LIBRARIES} pthread
-               TEST_ARGS ${CMAKE_CURRENT_SOURCE_DIR}/test.arpa
-                         ${CMAKE_CURRENT_SOURCE_DIR}/test_nounk.arpa)
-endif()

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/Jamfile
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/Jamfile b/ext/kenlm/lm/Jamfile
deleted file mode 100644
index a479e2d..0000000
--- a/ext/kenlm/lm/Jamfile
+++ /dev/null
@@ -1,40 +0,0 @@
-# If you need higher order, change this option
-# Having this limit means that State can be
-# (KENLM_MAX_ORDER - 1) * sizeof(float) bytes instead of
-# sizeof(float*) + (KENLM_MAX_ORDER - 1) * sizeof(float) + malloc overhead
-max-order = [ option.get "max-kenlm-order" : 6 : 6 ] ;
-if ( $(max-order) != 6 ) {
-   echo "Setting KenLM maximum n-gram order to $(max-order)" ;
-}
-max-order = <define>KENLM_MAX_ORDER=$(max-order) ;
-
-path-constant ORDER-LOG : bin/order.log ;
-update-if-changed $(ORDER-LOG) $(max-order) ;
-
-max-order += <dependency>$(ORDER-LOG) ;
-
-wrappers = ;
-local with-nplm = [ option.get "with-nplm" ] ;
-if $(with-nplm) {
-  lib nplm : : <search>$(with-nplm)/src ;
-  obj nplm.o : wrappers/nplm.cc : <include>.. <include>$(with-nplm)/src <cxxflags>-fopenmp <include>$(with-nplm)/3rdparty/eigen <define>NPLM_DOUBLE_PRECISION=0 ;
-  alias nplm-all : nplm.o nplm ..//boost_thread : : : <cxxflags>-fopenmp <linkflags>-fopenmp <define>WITH_NPLM <library>..//boost_thread ;
-  wrappers += nplm-all ;
-}
-
-fakelib kenlm : $(wrappers) [ glob *.cc : *main.cc *test.cc ] ../util//kenutil : <include>.. $(max-order) : : <include>.. $(max-order) ;
-
-import testing ;
-
-run left_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ;
-run model_test.cc kenlm /top//boost_unit_test_framework : : test.arpa test_nounk.arpa ;
-run partial_test.cc kenlm /top//boost_unit_test_framework : : test.arpa ;
-
-exes = ;
-for local p in [ glob *_main.cc ] {
-  local name = [ MATCH "(.*)\_main.cc" : $(p) ] ;
-  exe $(name) : $(p) kenlm ;
-  exes += $(name) ;
-}
-
-alias programs : $(exes) filter//filter builder//dump_counts : <threading>multi:<source>builder//lmplz ;

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/bhiksha.cc
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/bhiksha.cc b/ext/kenlm/lm/bhiksha.cc
deleted file mode 100644
index 4262b61..0000000
--- a/ext/kenlm/lm/bhiksha.cc
+++ /dev/null
@@ -1,94 +0,0 @@
-#include "lm/bhiksha.hh"
-
-#include "lm/binary_format.hh"
-#include "lm/config.hh"
-#include "util/file.hh"
-#include "util/exception.hh"
-
-#include <limits>
-
-namespace lm {
-namespace ngram {
-namespace trie {
-
-DontBhiksha::DontBhiksha(const void * /*base*/, uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) :
-  next_(util::BitsMask::ByMax(max_next)) {}
-
-const uint8_t kArrayBhikshaVersion = 0;
-
-// TODO: put this in binary file header instead when I change the binary file format again.
-void ArrayBhiksha::UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config) {
-  uint8_t buffer[2];
-  file.ReadForConfig(buffer, 2, offset);
-  uint8_t version = buffer[0];
-  uint8_t configured_bits = buffer[1];
-  if (version != kArrayBhikshaVersion) UTIL_THROW(FormatLoadException, "This file has sorted array compression version " << (unsigned) version << " but the code expects version " << (unsigned)kArrayBhikshaVersion);
-  config.pointer_bhiksha_bits = configured_bits;
-}
-
-namespace {
-
-// Find argmin_{chopped \in [0, RequiredBits(max_next)]} ChoppedDelta(max_offset)
-uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
-  uint8_t required = util::RequiredBits(max_next);
-  uint8_t best_chop = 0;
-  int64_t lowest_change = std::numeric_limits<int64_t>::max();
-  // There are probably faster ways but I don't care because this is only done once per order at construction time.
-  for (uint8_t chop = 0; chop <= std::min(required, config.pointer_bhiksha_bits); ++chop) {
-    int64_t change = (max_next >> (required - chop)) * 64 /* table cost in bits */
-      - max_offset * static_cast<int64_t>(chop); /* savings in bits*/
-    if (change < lowest_change) {
-      lowest_change = change;
-      best_chop = chop;
-    }
-  }
-  return best_chop;
-}
-
-std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &config) {
-  uint8_t required = util::RequiredBits(max_next);
-  uint8_t chopping = ChopBits(max_offset, max_next, config);
-  return (max_next >> (required - chopping)) + 1 /* we store 0 too */;
-}
-} // namespace
-
-uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) {
-  return sizeof(uint64_t) * (1 /* header */ + ArrayCount(max_offset, max_next, config)) + 7 /* 8-byte alignment */;
-}
-
-uint8_t ArrayBhiksha::InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
-  return util::RequiredBits(max_next) - ChopBits(max_offset, max_next, config);
-}
-
-namespace {
-
-void *AlignTo8(void *from) {
-  uint8_t *val = reinterpret_cast<uint8_t*>(from);
-  std::size_t remainder = reinterpret_cast<std::size_t>(val) & 7;
-  if (!remainder) return val;
-  return val + 8 - remainder;
-}
-
-} // namespace
-
-ArrayBhiksha::ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_next, const Config &config)
-  : next_inline_(util::BitsMask::ByBits(InlineBits(max_offset, max_next, config))),
-    offset_begin_(reinterpret_cast<const uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */),
-    offset_end_(offset_begin_ + ArrayCount(max_offset, max_next, config)),
-    write_to_(reinterpret_cast<uint64_t*>(AlignTo8(base)) + 1 /* 8-byte header */ + 1 /* first entry is 0 */),
-    original_base_(base) {}
-
-void ArrayBhiksha::FinishedLoading(const Config &config) {
-  // *offset_begin_ = 0 but without a const_cast.
-  *(write_to_ - (write_to_ - offset_begin_)) = 0;
-
-  if (write_to_ != offset_end_) UTIL_THROW(util::Exception, "Did not get all the array entries that were expected.");
-
-  uint8_t *head_write = reinterpret_cast<uint8_t*>(original_base_);
-  *(head_write++) = kArrayBhikshaVersion;
-  *(head_write++) = config.pointer_bhiksha_bits;
-}
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/6da3961b/ext/kenlm/lm/bhiksha.hh
----------------------------------------------------------------------
diff --git a/ext/kenlm b/ext/kenlm
new file mode 160000
index 0000000..56fdb5c
--- /dev/null
+++ b/ext/kenlm
@@ -0,0 +1 @@
+Subproject commit 56fdb5c44fca34d5a2e07d96139c28fb163983c5
diff --git a/ext/kenlm/lm/bhiksha.hh b/ext/kenlm/lm/bhiksha.hh
deleted file mode 100644
index 36438f1..0000000
--- a/ext/kenlm/lm/bhiksha.hh
+++ /dev/null
@@ -1,122 +0,0 @@
-/* Simple implementation of
- * @inproceedings{bhikshacompression,
- *  author={Bhiksha Raj and Ed Whittaker},
- *  year={2003},
- *  title={Lossless Compression of Language Model Structure and Word Identifiers},
- *  booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing},
- *  pages={388--391},
- *  }
- *
- *  Currently only used for next pointers.
- */
-
-#ifndef LM_BHIKSHA_H
-#define LM_BHIKSHA_H
-
-#include "lm/model_type.hh"
-#include "lm/trie.hh"
-#include "util/bit_packing.hh"
-#include "util/sorted_uniform.hh"
-
-#include <algorithm>
-#include <stdint.h>
-#include <cassert>
-
-namespace lm {
-namespace ngram {
-struct Config;
-class BinaryFormat;
-
-namespace trie {
-
-class DontBhiksha {
-  public:
-    static const ModelType kModelTypeAdd = static_cast<ModelType>(0);
-
-    static void UpdateConfigFromBinary(const BinaryFormat &, uint64_t, Config &/*config*/) {}
-
-    static uint64_t Size(uint64_t /*max_offset*/, uint64_t /*max_next*/, const Config &/*config*/) { return 0; }
-
-    static uint8_t InlineBits(uint64_t /*max_offset*/, uint64_t max_next, const Config &/*config*/) {
-      return util::RequiredBits(max_next);
-    }
-
-    DontBhiksha(const void *base, uint64_t max_offset, uint64_t max_next, const Config &config);
-
-    void ReadNext(const void *base, uint64_t bit_offset, uint64_t /*index*/, uint8_t total_bits, NodeRange &out) const {
-      out.begin = util::ReadInt57(base, bit_offset, next_.bits, next_.mask);
-      out.end = util::ReadInt57(base, bit_offset + total_bits, next_.bits, next_.mask);
-      //assert(out.end >= out.begin);
-    }
-
-    void WriteNext(void *base, uint64_t bit_offset, uint64_t /*index*/, uint64_t value) {
-      util::WriteInt57(base, bit_offset, next_.bits, value);
-    }
-
-    void FinishedLoading(const Config &/*config*/) {}
-
-    uint8_t InlineBits() const { return next_.bits; }
-
-  private:
-    util::BitsMask next_;
-};
-
-class ArrayBhiksha {
-  public:
-    static const ModelType kModelTypeAdd = kArrayAdd;
-
-    static void UpdateConfigFromBinary(const BinaryFormat &file, uint64_t offset, Config &config);
-
-    static uint64_t Size(uint64_t max_offset, uint64_t max_next, const Config &config);
-
-    static uint8_t InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config);
-
-    ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_value, const Config &config);
-
-    void ReadNext(const void *base, uint64_t bit_offset, uint64_t index, uint8_t total_bits, NodeRange &out) const {
-      // Some assertions are commented out because they are expensive.
-      // assert(*offset_begin_ == 0);
-      // std::upper_bound returns the first element that is greater.  Want the
-      // last element that is <= to the index.
-      const uint64_t *begin_it = std::upper_bound(offset_begin_, offset_end_, index) - 1;
-      // Since *offset_begin_ == 0, the position should be in range.
-      // assert(begin_it >= offset_begin_);
-      const uint64_t *end_it;
-      for (end_it = begin_it + 1; (end_it < offset_end_) && (*end_it <= index + 1); ++end_it) {}
-      // assert(end_it == std::upper_bound(offset_begin_, offset_end_, index + 1));
-      --end_it;
-      // assert(end_it >= begin_it);
-      out.begin = ((begin_it - offset_begin_) << next_inline_.bits) |
-        util::ReadInt57(base, bit_offset, next_inline_.bits, next_inline_.mask);
-      out.end = ((end_it - offset_begin_) << next_inline_.bits) |
-        util::ReadInt57(base, bit_offset + total_bits, next_inline_.bits, next_inline_.mask);
-      // If this fails, consider rebuilding your model using KenLM after 1e333d786b748555e8f368d2bbba29a016c98052
-      assert(out.end >= out.begin);
-    }
-
-    void WriteNext(void *base, uint64_t bit_offset, uint64_t index, uint64_t value) {
-      uint64_t encode = value >> next_inline_.bits;
-      for (; write_to_ <= offset_begin_ + encode; ++write_to_) *write_to_ = index;
-      util::WriteInt57(base, bit_offset, next_inline_.bits, value & next_inline_.mask);
-    }
-
-    void FinishedLoading(const Config &config);
-
-    uint8_t InlineBits() const { return next_inline_.bits; }
-
-  private:
-    const util::BitsMask next_inline_;
-
-    const uint64_t *const offset_begin_;
-    const uint64_t *const offset_end_;
-
-    uint64_t *write_to_;
-
-    void *original_base_;
-};
-
-} // namespace trie
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_BHIKSHA_H