You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by da...@apache.org on 2014/03/12 14:18:35 UTC
[02/10] [CAMEL-7249] Working blueprint tests
http://git-wip-us.apache.org/repos/asf/camel/blob/cd101646/tests/camel-itest-osgi/src/test/resources/hdfs-default.xml
----------------------------------------------------------------------
diff --git a/tests/camel-itest-osgi/src/test/resources/hdfs-default.xml b/tests/camel-itest-osgi/src/test/resources/hdfs-default.xml
new file mode 100644
index 0000000..24f0b03
--- /dev/null
+++ b/tests/camel-itest-osgi/src/test/resources/hdfs-default.xml
@@ -0,0 +1,1607 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Do not modify this file directly. Instead, copy entries that you -->
+<!-- wish to modify from this file into hdfs-site.xml and change them -->
+<!-- there. If hdfs-site.xml does not already exist, create it. -->
+
+<configuration>
+
+<property>
+ <name>hadoop.hdfs.configuration.version</name>
+ <value>1</value>
+ <description>version of this configuration file</description>
+</property>
+
+<property>
+ <name>dfs.namenode.logging.level</name>
+ <value>info</value>
+ <description>
+ The logging level for dfs namenode. Other values are "dir" (trace
+ namespace mutations), "block" (trace block under/over replications
+ and block creations/deletions), or "all".
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.rpc-address</name>
+ <value></value>
+ <description>
+ RPC address that handles all clients requests. In the case of HA/Federation where multiple namenodes exist,
+ the name service id is added to the name e.g. dfs.namenode.rpc-address.ns1
+ dfs.namenode.rpc-address.EXAMPLENAMESERVICE
+ The value of this property will take the form of nn-host1:rpc-port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.rpc-bind-host</name>
+ <value></value>
+ <description>
+ The actual address the server will bind to. If this optional address is
+ set, the RPC server will bind to this address and the port specified in
+ dfs.namenode.rpc-address for the RPC server. It can also be specified
+ per name node or name service for HA/Federation. This is most useful for
+ making name node listen to all interfaces by setting to 0.0.0.0.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.servicerpc-address</name>
+ <value></value>
+ <description>
+ RPC address for HDFS Services communication. BackupNode, Datanodes and all other services should be
+ connecting to this address if it is configured. In the case of HA/Federation where multiple namenodes exist,
+ the name service id is added to the name e.g. dfs.namenode.servicerpc-address.ns1
+ dfs.namenode.rpc-address.EXAMPLENAMESERVICE
+ The value of this property will take the form of nn-host1:rpc-port.
+ If the value of this property is unset the value of dfs.namenode.rpc-address will be used as the default.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.servicerpc-bind-host</name>
+ <value></value>
+ <description>
+ The actual address the server will bind to. If this optional address is
+ set, the service RPC server will bind to this address and the port
+ specified in dfs.namenode.servicerpc-address. It can also be specified
+ per name node or name service for HA/Federation. This is most useful for
+ making name node listen to all interfaces by setting to 0.0.0.0.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.secondary.http-address</name>
+ <value>0.0.0.0:50090</value>
+ <description>
+ The secondary namenode http server address and port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.address</name>
+ <value>0.0.0.0:50010</value>
+ <description>
+ The datanode server address and port for data transfer.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.http.address</name>
+ <value>0.0.0.0:50075</value>
+ <description>
+ The datanode http server address and port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.ipc.address</name>
+ <value>0.0.0.0:50020</value>
+ <description>
+ The datanode ipc server address and port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.handler.count</name>
+ <value>10</value>
+ <description>The number of server threads for the datanode.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.http-address</name>
+ <value>0.0.0.0:50070</value>
+ <description>
+ The address and the base port where the dfs namenode web ui will listen on.
+ </description>
+</property>
+
+<property>
+ <name>dfs.https.enable</name>
+ <value>false</value>
+ <description>
+ Deprecated. Use "dfs.http.policy" instead.
+ </description>
+</property>
+
+<property>
+ <name>dfs.http.policy</name>
+ <value>HTTP_ONLY</value>
+ <description>Decide if HTTPS(SSL) is supported on HDFS
+ This configures the HTTP endpoint for HDFS daemons:
+ The following values are supported:
+ - HTTP_ONLY : Service is provided only on http
+ - HTTPS_ONLY : Service is provided only on https
+ - HTTP_AND_HTTPS : Service is provided both on http and https
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.https.need-auth</name>
+ <value>false</value>
+ <description>Whether SSL client certificate authentication is required
+ </description>
+</property>
+
+<property>
+ <name>dfs.https.server.keystore.resource</name>
+ <value>ssl-server.xml</value>
+ <description>Resource file from which ssl server keystore
+ information will be extracted
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.https.keystore.resource</name>
+ <value>ssl-client.xml</value>
+ <description>Resource file from which ssl client keystore
+ information will be extracted
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.https.address</name>
+ <value>0.0.0.0:50475</value>
+ <description>The datanode secure http server address and port.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.https-address</name>
+ <value>0.0.0.0:50470</value>
+ <description>The namenode secure http server address and port.</description>
+</property>
+
+ <property>
+ <name>dfs.datanode.dns.interface</name>
+ <value>default</value>
+ <description>The name of the Network Interface from which a data node should
+ report its IP address.
+ </description>
+ </property>
+
+<property>
+ <name>dfs.datanode.dns.nameserver</name>
+ <value>default</value>
+ <description>The host name or IP address of the name server (DNS)
+ which a DataNode should use to determine the host name used by the
+ NameNode for communication and display purposes.
+ </description>
+ </property>
+
+ <property>
+ <name>dfs.namenode.backup.address</name>
+ <value>0.0.0.0:50100</value>
+ <description>
+ The backup node server address and port.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+ <property>
+ <name>dfs.namenode.backup.http-address</name>
+ <value>0.0.0.0:50105</value>
+ <description>
+ The backup node http server address and port.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.replication.considerLoad</name>
+ <value>true</value>
+ <description>Decide if chooseTarget considers the target's load or not
+ </description>
+</property>
+<property>
+ <name>dfs.default.chunk.view.size</name>
+ <value>32768</value>
+ <description>The number of bytes to view for a file on the browser.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.du.reserved</name>
+ <value>0</value>
+ <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.name.dir</name>
+ <value>file://${hadoop.tmp.dir}/dfs/name</value>
+ <description>Determines where on the local filesystem the DFS name node
+ should store the name table(fsimage). If this is a comma-delimited list
+ of directories then the name table is replicated in all of the
+ directories, for redundancy. </description>
+</property>
+
+<property>
+ <name>dfs.namenode.name.dir.restore</name>
+ <value>false</value>
+ <description>Set to true to enable NameNode to attempt recovering a
+ previously failed dfs.namenode.name.dir. When enabled, a recovery of any
+ failed directory is attempted during checkpoint.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.fs-limits.max-component-length</name>
+ <value>0</value>
+ <description>Defines the maximum number of characters in each component
+ of a path. A value of 0 will disable the check.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.fs-limits.max-directory-items</name>
+ <value>0</value>
+ <description>Defines the maximum number of items that a directory may
+ contain. A value of 0 will disable the check.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.fs-limits.min-block-size</name>
+ <value>1048576</value>
+ <description>Minimum block size in bytes, enforced by the Namenode at create
+ time. This prevents the accidental creation of files with tiny block
+ sizes (and thus many blocks), which can degrade
+ performance.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.fs-limits.max-blocks-per-file</name>
+ <value>1048576</value>
+ <description>Maximum number of blocks per file, enforced by the Namenode on
+ write. This prevents the creation of extremely large files which can
+ degrade performance.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.edits.dir</name>
+ <value>${dfs.namenode.name.dir}</value>
+ <description>Determines where on the local filesystem the DFS name node
+ should store the transaction (edits) file. If this is a comma-delimited list
+ of directories then the transaction file is replicated in all of the
+ directories, for redundancy. Default value is same as dfs.namenode.name.dir
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.shared.edits.dir</name>
+ <value></value>
+ <description>A directory on shared storage between the multiple namenodes
+ in an HA cluster. This directory will be written by the active and read
+ by the standby in order to keep the namespaces synchronized. This directory
+ does not need to be listed in dfs.namenode.edits.dir above. It should be
+ left empty in a non-HA cluster.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.edits.journal-plugin.qjournal</name>
+ <value>org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager</value>
+</property>
+
+<property>
+ <name>dfs.permissions.enabled</name>
+ <value>true</value>
+ <description>
+ If "true", enable permission checking in HDFS.
+ If "false", permission checking is turned off,
+ but all other behavior is unchanged.
+ Switching from one parameter value to the other does not change the mode,
+ owner or group of files or directories.
+ </description>
+</property>
+
+<property>
+ <name>dfs.permissions.superusergroup</name>
+ <value>supergroup</value>
+ <description>The name of the group of super-users.</description>
+</property>
+<!--
+<property>
+ <name>dfs.cluster.administrators</name>
+ <value>ACL for the admins</value>
+ <description>This configuration is used to control who can access the
+ default servlets in the namenode, etc.
+ </description>
+</property>
+-->
+
+<property>
+ <name>dfs.block.access.token.enable</name>
+ <value>false</value>
+ <description>
+ If "true", access tokens are used as capabilities for accessing datanodes.
+ If "false", no access tokens are checked on accessing datanodes.
+ </description>
+</property>
+
+<property>
+ <name>dfs.block.access.key.update.interval</name>
+ <value>600</value>
+ <description>
+ Interval in minutes at which namenode updates its access keys.
+ </description>
+</property>
+
+<property>
+ <name>dfs.block.access.token.lifetime</name>
+ <value>600</value>
+ <description>The lifetime of access tokens in minutes.</description>
+</property>
+
+<property>
+ <name>dfs.datanode.data.dir</name>
+ <value>file://${hadoop.tmp.dir}/dfs/data</value>
+ <description>Determines where on the local filesystem an DFS data node
+ should store its blocks. If this is a comma-delimited
+ list of directories, then data will be stored in all named
+ directories, typically on different devices.
+ Directories that do not exist are ignored.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.data.dir.perm</name>
+ <value>700</value>
+ <description>Permissions for the directories on on the local filesystem where
+ the DFS data node store its blocks. The permissions can either be octal or
+ symbolic.</description>
+</property>
+
+<property>
+ <name>dfs.replication</name>
+ <value>3</value>
+ <description>Default block replication.
+ The actual number of replications can be specified when the file is created.
+ The default is used if replication is not specified in create time.
+ </description>
+</property>
+
+<property>
+ <name>dfs.replication.max</name>
+ <value>512</value>
+ <description>Maximal block replication.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.replication.min</name>
+ <value>1</value>
+ <description>Minimal block replication.
+ </description>
+</property>
+
+<property>
+ <name>dfs.blocksize</name>
+ <value>134217728</value>
+ <description>
+ The default block size for new files, in bytes.
+ You can use the following suffix (case insensitive):
+ k(kilo), m(mega), g(giga), t(tera), p(peta), e(exa) to specify the size (such as 128k, 512m, 1g, etc.),
+ Or provide complete size in bytes (such as 134217728 for 128 MB).
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.block.write.retries</name>
+ <value>3</value>
+ <description>The number of retries for writing blocks to the data nodes,
+ before we signal failure to the application.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.block.write.replace-datanode-on-failure.enable</name>
+ <value>true</value>
+ <description>
+ If there is a datanode/network failure in the write pipeline,
+ DFSClient will try to remove the failed datanode from the pipeline
+ and then continue writing with the remaining datanodes. As a result,
+ the number of datanodes in the pipeline is decreased. The feature is
+ to add new datanodes to the pipeline.
+
+ This is a site-wide property to enable/disable the feature.
+
+ When the cluster size is extremely small, e.g. 3 nodes or less, cluster
+ administrators may want to set the policy to NEVER in the default
+ configuration file or disable this feature. Otherwise, users may
+ experience an unusually high rate of pipeline failures since it is
+ impossible to find new datanodes for replacement.
+
+ See also dfs.client.block.write.replace-datanode-on-failure.policy
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.block.write.replace-datanode-on-failure.policy</name>
+ <value>DEFAULT</value>
+ <description>
+ This property is used only if the value of
+ dfs.client.block.write.replace-datanode-on-failure.enable is true.
+
+ ALWAYS: always add a new datanode when an existing datanode is removed.
+
+ NEVER: never add a new datanode.
+
+ DEFAULT:
+ Let r be the replication number.
+ Let n be the number of existing datanodes.
+ Add a new datanode only if r is greater than or equal to 3 and either
+ (1) floor(r/2) is greater than or equal to n; or
+ (2) r is greater than n and the block is hflushed/appended.
+ </description>
+</property>
+
+<property>
+ <name>dfs.blockreport.intervalMsec</name>
+ <value>21600000</value>
+ <description>Determines block reporting interval in milliseconds.</description>
+</property>
+
+<property>
+ <name>dfs.blockreport.initialDelay</name> <value>0</value>
+ <description>Delay for first block report in seconds.</description>
+</property>
+
+<property>
+ <name>dfs.datanode.directoryscan.interval</name>
+ <value>21600</value>
+ <description>Interval in seconds for Datanode to scan data directories and
+ reconcile the difference between blocks in memory and on the disk.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.directoryscan.threads</name>
+ <value>1</value>
+ <description>How many threads should the threadpool used to compile reports
+ for volumes in parallel have.
+ </description>
+</property>
+
+<property>
+ <name>dfs.heartbeat.interval</name>
+ <value>3</value>
+ <description>Determines datanode heartbeat interval in seconds.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.handler.count</name>
+ <value>10</value>
+ <description>The number of server threads for the namenode.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.safemode.threshold-pct</name>
+ <value>0.999f</value>
+ <description>
+ Specifies the percentage of blocks that should satisfy
+ the minimal replication requirement defined by dfs.namenode.replication.min.
+ Values less than or equal to 0 mean not to wait for any particular
+ percentage of blocks before exiting safemode.
+ Values greater than 1 will make safe mode permanent.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.safemode.min.datanodes</name>
+ <value>0</value>
+ <description>
+ Specifies the number of datanodes that must be considered alive
+ before the name node exits safemode.
+ Values less than or equal to 0 mean not to take the number of live
+ datanodes into account when deciding whether to remain in safe mode
+ during startup.
+ Values greater than the number of datanodes in the cluster
+ will make safe mode permanent.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.safemode.extension</name>
+ <value>30000</value>
+ <description>
+ Determines extension of safe mode in milliseconds
+ after the threshold level is reached.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.balance.bandwidthPerSec</name>
+ <value>1048576</value>
+ <description>
+ Specifies the maximum amount of bandwidth that each datanode
+ can utilize for the balancing purpose in term of
+ the number of bytes per second.
+ </description>
+</property>
+
+<property>
+ <name>dfs.hosts</name>
+ <value></value>
+ <description>Names a file that contains a list of hosts that are
+ permitted to connect to the namenode. The full pathname of the file
+ must be specified. If the value is empty, all hosts are
+ permitted.</description>
+</property>
+
+<property>
+ <name>dfs.hosts.exclude</name>
+ <value></value>
+ <description>Names a file that contains a list of hosts that are
+ not permitted to connect to the namenode. The full pathname of the
+ file must be specified. If the value is empty, no hosts are
+ excluded.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.max.objects</name>
+ <value>0</value>
+ <description>The maximum number of files, directories and blocks
+ dfs supports. A value of zero indicates no limit to the number
+ of objects that dfs supports.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
+ <value>true</value>
+ <description>
+ If true (the default), then the namenode requires that a connecting
+ datanode's address must be resolved to a hostname. If necessary, a reverse
+ DNS lookup is performed. All attempts to register a datanode from an
+ unresolvable address are rejected.
+
+ It is recommended that this setting be left on to prevent accidental
+ registration of datanodes listed by hostname in the excludes file during a
+ DNS outage. Only set this to false in environments where there is no
+ infrastructure to support reverse DNS lookup.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.decommission.interval</name>
+ <value>30</value>
+ <description>Namenode periodicity in seconds to check if decommission is
+ complete.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.decommission.nodes.per.interval</name>
+ <value>5</value>
+ <description>The number of nodes namenode checks if decommission is complete
+ in each dfs.namenode.decommission.interval.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.replication.interval</name>
+ <value>3</value>
+ <description>The periodicity in seconds with which the namenode computes
+ repliaction work for datanodes. </description>
+</property>
+
+<property>
+ <name>dfs.namenode.accesstime.precision</name>
+ <value>3600000</value>
+ <description>The access time for HDFS file is precise upto this value.
+ The default value is 1 hour. Setting a value of 0 disables
+ access times for HDFS.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.plugins</name>
+ <value></value>
+ <description>Comma-separated list of datanode plug-ins to be activated.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.plugins</name>
+ <value></value>
+ <description>Comma-separated list of namenode plug-ins to be activated.
+ </description>
+</property>
+
+<property>
+ <name>dfs.stream-buffer-size</name>
+ <value>4096</value>
+ <description>The size of buffer to stream files.
+ The size of this buffer should probably be a multiple of hardware
+ page size (4096 on Intel x86), and it determines how much data is
+ buffered during read and write operations.</description>
+</property>
+
+<property>
+ <name>dfs.bytes-per-checksum</name>
+ <value>512</value>
+ <description>The number of bytes per checksum. Must not be larger than
+ dfs.stream-buffer-size</description>
+</property>
+
+<property>
+ <name>dfs.client-write-packet-size</name>
+ <value>65536</value>
+ <description>Packet size for clients to write</description>
+</property>
+
+<property>
+ <name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name>
+ <value>600000</value>
+ <description>The maximum period to keep a DN in the excluded nodes list
+ at a client. After this period, in milliseconds, the previously excluded node(s) will
+ be removed automatically from the cache and will be considered good for block allocations
+ again. Useful to lower or raise in situations where you keep a file open for very long
+ periods (such as a Write-Ahead-Log (WAL) file) to make the writer tolerant to cluster maintenance
+ restarts. Defaults to 10 minutes.</description>
+</property>
+
+<property>
+ <name>dfs.namenode.checkpoint.dir</name>
+ <value>file://${hadoop.tmp.dir}/dfs/namesecondary</value>
+ <description>Determines where on the local filesystem the DFS secondary
+ name node should store the temporary images to merge.
+ If this is a comma-delimited list of directories then the image is
+ replicated in all of the directories for redundancy.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.checkpoint.edits.dir</name>
+ <value>${dfs.namenode.checkpoint.dir}</value>
+ <description>Determines where on the local filesystem the DFS secondary
+ name node should store the temporary edits to merge.
+ If this is a comma-delimited list of directoires then teh edits is
+ replicated in all of the directoires for redundancy.
+ Default value is same as dfs.namenode.checkpoint.dir
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.checkpoint.period</name>
+ <value>3600</value>
+ <description>The number of seconds between two periodic checkpoints.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.checkpoint.txns</name>
+ <value>1000000</value>
+ <description>The Secondary NameNode or CheckpointNode will create a checkpoint
+ of the namespace every 'dfs.namenode.checkpoint.txns' transactions, regardless
+ of whether 'dfs.namenode.checkpoint.period' has expired.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.checkpoint.check.period</name>
+ <value>60</value>
+ <description>The SecondaryNameNode and CheckpointNode will poll the NameNode
+ every 'dfs.namenode.checkpoint.check.period' seconds to query the number
+ of uncheckpointed transactions.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.checkpoint.max-retries</name>
+ <value>3</value>
+ <description>The SecondaryNameNode retries failed checkpointing. If the
+ failure occurs while loading fsimage or replaying edits, the number of
+ retries is limited by this variable.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.num.checkpoints.retained</name>
+ <value>2</value>
+ <description>The number of image checkpoint files that will be retained by
+ the NameNode and Secondary NameNode in their storage directories. All edit
+ logs necessary to recover an up-to-date namespace from the oldest retained
+ checkpoint will also be retained.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.num.extra.edits.retained</name>
+ <value>1000000</value>
+ <description>The number of extra transactions which should be retained
+ beyond what is minimally necessary for a NN restart. This can be useful for
+ audit purposes or for an HA setup where a remote Standby Node may have
+ been offline for some time and need to have a longer backlog of retained
+ edits in order to start again.
+ Typically each edit is on the order of a few hundred bytes, so the default
+ of 1 million edits should be on the order of hundreds of MBs or low GBs.
+
+ NOTE: Fewer extra edits may be retained than value specified for this setting
+ if doing so would mean that more segments would be retained than the number
+ configured by dfs.namenode.max.extra.edits.segments.retained.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.max.extra.edits.segments.retained</name>
+ <value>10000</value>
+ <description>The maximum number of extra edit log segments which should be retained
+ beyond what is minimally necessary for a NN restart. When used in conjunction with
+ dfs.namenode.num.extra.edits.retained, this configuration property serves to cap
+ the number of extra edits files to a reasonable value.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.delegation.key.update-interval</name>
+ <value>86400000</value>
+ <description>The update interval for master key for delegation tokens
+ in the namenode in milliseconds.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.delegation.token.max-lifetime</name>
+ <value>604800000</value>
+ <description>The maximum lifetime in milliseconds for which a delegation
+ token is valid.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.delegation.token.renew-interval</name>
+ <value>86400000</value>
+ <description>The renewal interval for delegation token in milliseconds.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.failed.volumes.tolerated</name>
+ <value>0</value>
+ <description>The number of volumes that are allowed to
+ fail before a datanode stops offering service. By default
+ any volume failure will cause a datanode to shutdown.
+ </description>
+</property>
+
+<property>
+ <name>dfs.image.compress</name>
+ <value>false</value>
+ <description>Should the dfs image be compressed?
+ </description>
+</property>
+
+<property>
+ <name>dfs.image.compression.codec</name>
+ <value>org.apache.hadoop.io.compress.DefaultCodec</value>
+ <description>If the dfs image is compressed, how should they be compressed?
+ This has to be a codec defined in io.compression.codecs.
+ </description>
+</property>
+
+<property>
+ <name>dfs.image.transfer.timeout</name>
+ <value>600000</value>
+ <description>
+ Timeout for image transfer in milliseconds. This timeout and the related
+ dfs.image.transfer.bandwidthPerSec parameter should be configured such
+ that normal image transfer can complete within the timeout.
+ This timeout prevents client hangs when the sender fails during
+ image transfer, which is particularly important during checkpointing.
+ Note that this timeout applies to the entirety of image transfer, and
+ is not a socket timeout.
+ </description>
+</property>
+
+<property>
+ <name>dfs.image.transfer.bandwidthPerSec</name>
+ <value>0</value>
+ <description>
+ Maximum bandwidth used for image transfer in bytes per second.
+ This can help keep normal namenode operations responsive during
+ checkpointing. The maximum bandwidth and timeout in
+ dfs.image.transfer.timeout should be set such that normal image
+ transfers can complete successfully.
+ A default value of 0 indicates that throttling is disabled.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.support.allow.format</name>
+ <value>true</value>
+ <description>Does HDFS namenode allow itself to be formatted?
+ You may consider setting this to false for any production
+ cluster, to avoid any possibility of formatting a running DFS.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.max.transfer.threads</name>
+ <value>4096</value>
+ <description>
+ Specifies the maximum number of threads to use for transferring data
+ in and out of the DN.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.readahead.bytes</name>
+ <value>4193404</value>
+ <description>
+ While reading block files, if the Hadoop native libraries are available,
+ the datanode can use the posix_fadvise system call to explicitly
+ page data into the operating system buffer cache ahead of the current
+ reader's position. This can improve performance especially when
+ disks are highly contended.
+
+ This configuration specifies the number of bytes ahead of the current
+ read position which the datanode will attempt to read ahead. This
+ feature may be disabled by configuring this property to 0.
+
+ If the native libraries are not available, this configuration has no
+ effect.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.drop.cache.behind.reads</name>
+ <value>false</value>
+ <description>
+ In some workloads, the data read from HDFS is known to be significantly
+ large enough that it is unlikely to be useful to cache it in the
+ operating system buffer cache. In this case, the DataNode may be
+ configured to automatically purge all data from the buffer cache
+ after it is delivered to the client. This behavior is automatically
+ disabled for workloads which read only short sections of a block
+ (e.g HBase random-IO workloads).
+
+ This may improve performance for some workloads by freeing buffer
+ cache spage usage for more cacheable data.
+
+ If the Hadoop native libraries are not available, this configuration
+ has no effect.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.drop.cache.behind.writes</name>
+ <value>false</value>
+ <description>
+ In some workloads, the data written to HDFS is known to be significantly
+ large enough that it is unlikely to be useful to cache it in the
+ operating system buffer cache. In this case, the DataNode may be
+ configured to automatically purge all data from the buffer cache
+ after it is written to disk.
+
+ This may improve performance for some workloads by freeing buffer
+ cache spage usage for more cacheable data.
+
+ If the Hadoop native libraries are not available, this configuration
+ has no effect.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.sync.behind.writes</name>
+ <value>false</value>
+ <description>
+ If this configuration is enabled, the datanode will instruct the
+ operating system to enqueue all written data to the disk immediately
+ after it is written. This differs from the usual OS policy which
+ may wait for up to 30 seconds before triggering writeback.
+
+ This may improve performance for some workloads by smoothing the
+ IO profile for data written to disk.
+
+ If the Hadoop native libraries are not available, this configuration
+ has no effect.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.failover.max.attempts</name>
+ <value>15</value>
+ <description>
+ Expert only. The number of client failover attempts that should be
+ made before the failover is considered failed.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.failover.sleep.base.millis</name>
+ <value>500</value>
+ <description>
+ Expert only. The time to wait, in milliseconds, between failover
+ attempts increases exponentially as a function of the number of
+ attempts made so far, with a random factor of +/- 50%. This option
+ specifies the base value used in the failover calculation. The
+ first failover will retry immediately. The 2nd failover attempt
+ will delay at least dfs.client.failover.sleep.base.millis
+ milliseconds. And so on.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.failover.sleep.max.millis</name>
+ <value>15000</value>
+ <description>
+ Expert only. The time to wait, in milliseconds, between failover
+ attempts increases exponentially as a function of the number of
+ attempts made so far, with a random factor of +/- 50%. This option
+ specifies the maximum value to wait between failovers.
+ Specifically, the time between two failover attempts will not
+ exceed +/- 50% of dfs.client.failover.sleep.max.millis
+ milliseconds.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.failover.connection.retries</name>
+ <value>0</value>
+ <description>
+ Expert only. Indicates the number of retries a failover IPC client
+ will make to establish a server connection.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.failover.connection.retries.on.timeouts</name>
+ <value>0</value>
+ <description>
+ Expert only. The number of retry attempts a failover IPC client
+ will make on socket timeout when establishing a server connection.
+ </description>
+</property>
+
+<property>
+ <name>dfs.nameservices</name>
+ <value></value>
+ <description>
+ Comma-separated list of nameservices.
+ </description>
+</property>
+
+<property>
+ <name>dfs.nameservice.id</name>
+ <value></value>
+ <description>
+ The ID of this nameservice. If the nameservice ID is not
+ configured or more than one nameservice is configured for
+ dfs.nameservices it is determined automatically by
+ matching the local node's address with the configured address.
+ </description>
+</property>
+
+<property>
+ <name>dfs.ha.namenodes.EXAMPLENAMESERVICE</name>
+ <value></value>
+ <description>
+ The prefix for a given nameservice, contains a comma-separated
+ list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE).
+ </description>
+</property>
+
+<property>
+ <name>dfs.ha.namenode.id</name>
+ <value></value>
+ <description>
+ The ID of this namenode. If the namenode ID is not configured it
+ is determined automatically by matching the local node's address
+ with the configured address.
+ </description>
+</property>
+
+<property>
+ <name>dfs.ha.log-roll.period</name>
+ <value>120</value>
+ <description>
+ How often, in seconds, the StandbyNode should ask the active to
+ roll edit logs. Since the StandbyNode only reads from finalized
+ log segments, the StandbyNode will only be as up-to-date as how
+ often the logs are rolled. Note that failover triggers a log roll
+ so the StandbyNode will be up to date before it becomes active.
+ </description>
+</property>
+
+<property>
+ <name>dfs.ha.tail-edits.period</name>
+ <value>60</value>
+ <description>
+ How often, in seconds, the StandbyNode should check for new
+ finalized log segments in the shared edits log.
+ </description>
+</property>
+
+<property>
+ <name>dfs.ha.automatic-failover.enabled</name>
+ <value>false</value>
+ <description>
+ Whether automatic failover is enabled. See the HDFS High
+ Availability documentation for details on automatic HA
+ configuration.
+ </description>
+</property>
+
+<property>
+ <name>dfs.support.append</name>
+ <value>true</value>
+ <description>
+ Does HDFS allow appends to files?
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.use.datanode.hostname</name>
+ <value>false</value>
+ <description>Whether clients should use datanode hostnames when
+ connecting to datanodes.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.use.datanode.hostname</name>
+ <value>false</value>
+ <description>Whether datanodes should use datanode hostnames when
+ connecting to other datanodes for data transfer.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.local.interfaces</name>
+ <value></value>
+ <description>A comma separated list of network interface names to use
+ for data transfer between the client and datanodes. When creating
+ a connection to read from or write to a datanode, the client
+ chooses one of the specified interfaces at random and binds its
+ socket to the IP of that interface. Individual names may be
+ specified as either an interface name (eg "eth0"), a subinterface
+ name (eg "eth0:0"), or an IP address (which may be specified using
+ CIDR notation to match a range of IPs).
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.kerberos.internal.spnego.principal</name>
+ <value>${dfs.web.authentication.kerberos.principal}</value>
+</property>
+
+<property>
+ <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
+ <value>${dfs.web.authentication.kerberos.principal}</value>
+</property>
+
+<property>
+ <name>dfs.namenode.avoid.read.stale.datanode</name>
+ <value>false</value>
+ <description>
+ Indicate whether or not to avoid reading from "stale" datanodes whose
+ heartbeat messages have not been received by the namenode
+ for more than a specified time interval. Stale datanodes will be
+ moved to the end of the node list returned for reading. See
+ dfs.namenode.avoid.write.stale.datanode for a similar setting for writes.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.avoid.write.stale.datanode</name>
+ <value>false</value>
+ <description>
+ Indicate whether or not to avoid writing to "stale" datanodes whose
+ heartbeat messages have not been received by the namenode
+ for more than a specified time interval. Writes will avoid using
+ stale datanodes unless more than a configured ratio
+ (dfs.namenode.write.stale.datanode.ratio) of datanodes are marked as
+ stale. See dfs.namenode.avoid.read.stale.datanode for a similar setting
+ for reads.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.stale.datanode.interval</name>
+ <value>30000</value>
+ <description>
+ Default time interval for marking a datanode as "stale", i.e., if
+ the namenode has not received heartbeat msg from a datanode for
+ more than this time interval, the datanode will be marked and treated
+ as "stale" by default. The stale interval cannot be too small since
+ otherwise this may cause too frequent change of stale states.
+ We thus set a minimum stale interval value (the default value is 3 times
+ of heartbeat interval) and guarantee that the stale interval cannot be less
+ than the minimum value. A stale data node is avoided during lease/block
+ recovery. It can be conditionally avoided for reads (see
+ dfs.namenode.avoid.read.stale.datanode) and for writes (see
+ dfs.namenode.avoid.write.stale.datanode).
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.write.stale.datanode.ratio</name>
+ <value>0.5f</value>
+ <description>
+ When the ratio of number stale datanodes to total datanodes marked
+ is greater than this ratio, stop avoiding writing to stale nodes so
+ as to prevent causing hotspots.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.invalidate.work.pct.per.iteration</name>
+ <value>0.32f</value>
+ <description>
+ *Note*: Advanced property. Change with caution.
+ This determines the percentage amount of block
+ invalidations (deletes) to do over a single DN heartbeat
+ deletion command. The final deletion count is determined by applying this
+ percentage to the number of live nodes in the system.
+ The resultant number is the number of blocks from the deletion list
+ chosen for proper invalidation over a single heartbeat of a single DN.
+ Value should be a positive, non-zero percentage in float notation (X.Yf),
+ with 1.0f meaning 100%.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.replication.work.multiplier.per.iteration</name>
+ <value>2</value>
+ <description>
+ *Note*: Advanced property. Change with caution.
+ This determines the total amount of block transfers to begin in
+ parallel at a DN, for replication, when such a command list is being
+ sent over a DN heartbeat by the NN. The actual number is obtained by
+ multiplying this multiplier with the total number of live nodes in the
+ cluster. The result number is the number of blocks to begin transfers
+ immediately for, per DN heartbeat. This number can be any positive,
+ non-zero integer.
+ </description>
+</property>
+
+<property>
+ <name>dfs.webhdfs.enabled</name>
+ <value>true</value>
+ <description>
+ Enable WebHDFS (REST API) in Namenodes and Datanodes.
+ </description>
+</property>
+
+<property>
+ <name>hadoop.fuse.connection.timeout</name>
+ <value>300</value>
+ <description>
+ The minimum number of seconds that we'll cache libhdfs connection objects
+ in fuse_dfs. Lower values will result in lower memory consumption; higher
+ values may speed up access by avoiding the overhead of creating new
+ connection objects.
+ </description>
+</property>
+
+<property>
+ <name>hadoop.fuse.timer.period</name>
+ <value>5</value>
+ <description>
+ The number of seconds between cache expiry checks in fuse_dfs. Lower values
+ will result in fuse_dfs noticing changes to Kerberos ticket caches more
+ quickly.
+ </description>
+</property>
+
+<property>
+ <name>dfs.metrics.percentiles.intervals</name>
+ <value></value>
+ <description>
+ Comma-delimited set of integers denoting the desired rollover intervals
+ (in seconds) for percentile latency metrics on the Namenode and Datanode.
+ By default, percentile latency metrics are disabled.
+ </description>
+</property>
+
+<property>
+ <name>dfs.encrypt.data.transfer</name>
+ <value>false</value>
+ <description>
+ Whether or not actual block data that is read/written from/to HDFS should
+ be encrypted on the wire. This only needs to be set on the NN and DNs,
+ clients will deduce this automatically.
+ </description>
+</property>
+
+<property>
+ <name>dfs.encrypt.data.transfer.algorithm</name>
+ <value></value>
+ <description>
+ This value may be set to either "3des" or "rc4". If nothing is set, then
+ the configured JCE default on the system is used (usually 3DES.) It is
+ widely believed that 3DES is more cryptographically secure, but RC4 is
+ substantially faster.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
+ <value>false</value>
+ <description>
+ Boolean which enables backend datanode-side support for the experimental DistributedFileSystem#getFileVBlockStorageLocations API.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.file-block-storage-locations.num-threads</name>
+ <value>10</value>
+ <description>
+ Number of threads used for making parallel RPCs in DistributedFileSystem#getFileBlockStorageLocations().
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.file-block-storage-locations.timeout</name>
+ <value>60</value>
+ <description>
+ Timeout (in seconds) for the parallel RPCs made in DistributedFileSystem#getFileBlockStorageLocations().
+ </description>
+</property>
+
+<property>
+ <name>dfs.journalnode.rpc-address</name>
+ <value>0.0.0.0:8485</value>
+ <description>
+ The JournalNode RPC server address and port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.journalnode.http-address</name>
+ <value>0.0.0.0:8480</value>
+ <description>
+ The address and port the JournalNode web UI listens on.
+ If the port is 0 then the server will start on a free port.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.audit.loggers</name>
+ <value>default</value>
+ <description>
+ List of classes implementing audit loggers that will receive audit events.
+ These should be implementations of org.apache.hadoop.hdfs.server.namenode.AuditLogger.
+ The special value "default" can be used to reference the default audit
+ logger, which uses the configured log system. Installing custom audit loggers
+ may affect the performance and stability of the NameNode. Refer to the custom
+ logger's documentation for more details.
+ </description>
+</property>
+
+<property>
+ <name>dfs.domain.socket.path</name>
+ <value></value>
+ <description>
+ Optional. This is a path to a UNIX domain socket that will be used for
+ communication between the DataNode and local HDFS clients.
+ If the string "_PORT" is present in this path, it will be replaced by the
+ TCP port of the DataNode.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
+ <value>10737418240</value> <!-- 10 GB -->
+ <description>
+ Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to
+ org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy.
+ This setting controls how much DN volumes are allowed to differ in terms of
+ bytes of free disk space before they are considered imbalanced. If the free
+ space of all the volumes are within this range of each other, the volumes
+ will be considered balanced and block assignments will be done on a pure
+ round robin basis.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
+ <value>0.75f</value>
+ <description>
+ Only used when the dfs.datanode.fsdataset.volume.choosing.policy is set to
+ org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy.
+ This setting controls what percentage of new block allocations will be sent
+ to volumes with more available disk space than others. This setting should
+ be in the range 0.0 - 1.0, though in practice 0.5 - 1.0, since there should
+ be no reason to prefer that volumes with less available disk space receive
+ more block allocations.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.edits.noeditlogchannelflush</name>
+ <value>false</value>
+ <description>
+ Specifies whether to flush edit log file channel. When set, expensive
+ FileChannel#force calls are skipped and synchronous disk writes are
+ enabled instead by opening the edit log file with RandomAccessFile("rws")
+ flags. This can significantly improve the performance of edit log writes
+ on the Windows platform.
+ Note that the behavior of the "rws" flags is platform and hardware specific
+ and might not provide the same level of guarantees as FileChannel#force.
+ For example, the write will skip the disk-cache on SAS and SCSI devices
+ while it might not on SATA devices. This is an expert level setting,
+ change with caution.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.cache.drop.behind.writes</name>
+ <value></value>
+ <description>
+ Just like dfs.datanode.drop.cache.behind.writes, this setting causes the
+ page cache to be dropped behind HDFS writes, potentially freeing up more
+ memory for other uses. Unlike dfs.datanode.drop.cache.behind.writes, this
+ is a client-side setting rather than a setting for the entire datanode.
+ If present, this setting will override the DataNode default.
+
+ If the native libraries are not available to the DataNode, this
+ configuration has no effect.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.cache.drop.behind.reads</name>
+ <value></value>
+ <description>
+ Just like dfs.datanode.drop.cache.behind.reads, this setting causes the
+ page cache to be dropped behind HDFS reads, potentially freeing up more
+ memory for other uses. Unlike dfs.datanode.drop.cache.behind.reads, this
+ is a client-side setting rather than a setting for the entire datanode. If
+ present, this setting will override the DataNode default.
+
+ If the native libraries are not available to the DataNode, this
+ configuration has no effect.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.cache.readahead</name>
+ <value></value>
+ <description>
+ When using remote reads, this setting causes the datanode to
+ read ahead in the block file using posix_fadvise, potentially decreasing
+ I/O wait times. Unlike dfs.datanode.readahead.bytes, this is a client-side
+ setting rather than a setting for the entire datanode. If present, this
+ setting will override the DataNode default.
+
+ When using local reads, this setting determines how much readahead we do in
+ BlockReaderLocal.
+
+ If the native libraries are not available to the DataNode, this
+ configuration has no effect.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.enable.retrycache</name>
+ <value>true</value>
+ <description>
+ This enables the retry cache on the namenode. Namenode tracks for
+ non-idempotent requests the corresponding response. If a client retries the
+ request, the response from the retry cache is sent. Such operations
+ are tagged with annotation @AtMostOnce in namenode protocols. It is
+ recommended that this flag be set to true. Setting it to false, will result
+ in clients getting failure responses to retried request. This flag must
+ be enabled in HA setup for transparent fail-overs.
+
+ The entries in the cache have expiration time configurable
+ using dfs.namenode.retrycache.expirytime.millis.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.retrycache.expirytime.millis</name>
+ <value>600000</value>
+ <description>
+ The time for which retry cache entries are retained.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.retrycache.heap.percent</name>
+ <value>0.03f</value>
+ <description>
+ This parameter configures the heap size allocated for retry cache
+ (excluding the response cached). This corresponds to approximately
+ 4096 entries for every 64MB of namenode process java heap size.
+ Assuming retry cache entry expiration time (configured using
+ dfs.namenode.retrycache.expirytime.millis) of 10 minutes, this
+ enables retry cache to support 7 operations per second sustained
+ for 10 minutes. As the heap size is increased, the operation rate
+ linearly increases.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.mmap.cache.size</name>
+ <value>1024</value>
+ <description>
+ When zero-copy reads are used, the DFSClient keeps a cache of recently used
+ memory mapped regions. This parameter controls the maximum number of
+ entries that we will keep in that cache.
+
+ If this is set to 0, we will not allow mmap.
+
+ The larger this number is, the more file descriptors we will potentially
+ use for memory-mapped files. mmaped files also use virtual address space.
+ You may need to increase your ulimit virtual address space limits before
+ increasing the client mmap cache size.
+ </description>
+</property>
+
+<property>
+ <name>dfs.client.mmap.cache.timeout.ms</name>
+ <value>900000</value>
+ <description>
+ The minimum length of time that we will keep an mmap entry in the cache
+ between uses. If an entry is in the cache longer than this, and nobody
+ uses it, it will be removed by a background thread.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.path.based.cache.block.map.allocation.percent</name>
+ <value>0.25</value>
+ <description>
+ The percentage of the Java heap which we will allocate to the cached blocks
+ map. The cached blocks map is a hash map which uses chained hashing.
+ Smaller maps may be accessed more slowly if the number of cached blocks is
+ large; larger maps will consume more memory.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.max.locked.memory</name>
+ <value>0</value>
+ <description>
+ The amount of memory in bytes to use for caching of block replicas in
+ memory on the datanode. The datanode's maximum locked memory soft ulimit
+ (RLIMIT_MEMLOCK) must be set to at least this value, else the datanode
+ will abort on startup.
+
+ By default, this parameter is set to 0, which disables in-memory caching.
+
+ If the native libraries are not available to the DataNode, this
+ configuration has no effect.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.list.cache.directives.num.responses</name>
+ <value>100</value>
+ <description>
+ This value controls the number of cache directives that the NameNode will
+ send over the wire in response to a listDirectives RPC.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.list.cache.pools.num.responses</name>
+ <value>100</value>
+ <description>
+ This value controls the number of cache pools that the NameNode will
+ send over the wire in response to a listPools RPC.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.path.based.cache.refresh.interval.ms</name>
+ <value>300000</value>
+ <description>
+ The amount of milliseconds between subsequent path cache rescans. Path
+ cache rescans are when we calculate which blocks should be cached, and on
+ what datanodes.
+
+ By default, this parameter is set to 300000, which is five minutes.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.path.based.cache.retry.interval.ms</name>
+ <value>60000</value>
+ <description>
+ When the NameNode needs to uncache something that is cached, or cache
+ something that is not cached, it must direct the DataNodes to do so by
+ sending a DNA_CACHE or DNA_UNCACHE command in response to a DataNode
+ heartbeat. This parameter controls how frequently the NameNode will
+ resend these commands.
+ </description>
+</property>
+
+<property>
+ <name>dfs.datanode.fsdatasetcache.max.threads.per.volume</name>
+ <value>4</value>
+ <description>
+ The maximum number of threads per volume to use for caching new data
+ on the datanode. These threads consume both I/O and CPU. This can affect
+ normal datanode operations.
+ </description>
+</property>
+
+<property>
+ <name>dfs.cachereport.intervalMsec</name>
+ <value>10000</value>
+ <description>
+ Determines cache reporting interval in milliseconds. After this amount of
+ time, the DataNode sends a full report of its cache state to the NameNode.
+ The NameNode uses the cache report to update its map of cached blocks to
+ DataNode locations.
+
+ This configuration has no effect if in-memory caching has been disabled by
+ setting dfs.datanode.max.locked.memory to 0 (which is the default).
+
+ If the native libraries are not available to the DataNode, this
+ configuration has no effect.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.edit.log.autoroll.multiplier.threshold</name>
+ <value>2.0</value>
+ <description>
+ Determines when an active namenode will roll its own edit log.
+ The actual threshold (in number of edits) is determined by multiplying
+ this value by dfs.namenode.checkpoint.txns.
+
+ This prevents extremely large edit files from accumulating on the active
+ namenode, which can cause timeouts during namenode startup and pose an
+ administrative hassle. This behavior is intended as a failsafe for when
+ the standby or secondary namenode fail to roll the edit log by the normal
+ checkpoint threshold.
+ </description>
+</property>
+
+<property>
+ <name>dfs.namenode.edit.log.autoroll.check.interval.ms</name>
+ <value>300000</value>
+ <description>
+ How often an active namenode will check if it needs to roll its edit log,
+ in milliseconds.
+ </description>
+</property>
+
+<property>
+ <name>dfs.webhdfs.user.provider.user.pattern</name>
+ <value>^[A-Za-z_][A-Za-z0-9._-]*[$]?$</value>
+ <description>
+ Valid pattern for user and group names for webhdfs, it must be a valid java regex.
+ </description>
+</property>
+
+</configuration>
http://git-wip-us.apache.org/repos/asf/camel/blob/cd101646/tests/camel-itest-osgi/src/test/resources/org/apache/camel/itest/osgi/hdfs/blueprintCamelContext.xml
----------------------------------------------------------------------
diff --git a/tests/camel-itest-osgi/src/test/resources/org/apache/camel/itest/osgi/hdfs/blueprintCamelContext.xml b/tests/camel-itest-osgi/src/test/resources/org/apache/camel/itest/osgi/hdfs/blueprintCamelContext.xml
index 7f82450..a905acf 100644
--- a/tests/camel-itest-osgi/src/test/resources/org/apache/camel/itest/osgi/hdfs/blueprintCamelContext.xml
+++ b/tests/camel-itest-osgi/src/test/resources/org/apache/camel/itest/osgi/hdfs/blueprintCamelContext.xml
@@ -28,11 +28,13 @@
Bean created as prerequisite for camel-hdfs2 component. It's role is to properly initialize Hadoop's internal
map of file systems which is normally (non-OSGi) initialized using java.util.ServiceLoader.
In OSGi ServiceLoader doesn't see *all* JARs to scan for services.
+ The map must be initialized with correct URIs - not pure schemes + "://", as the URI is then used to
+ initialize particular hadoop filesystem initialization which needs more URI components than just scheme.
</description>
<argument>
<map>
- <entry key="file" value="org.apache.hadoop.fs.LocalFileSystem" />
- <entry key="hdfs" value="org.apache.hadoop.hdfs.DistributedFileSystem" />
+ <entry key="file:///" value="org.apache.hadoop.fs.LocalFileSystem" />
+ <entry key="hdfs://localhost:9000" value="org.apache.hadoop.hdfs.DistributedFileSystem" />
</map>
</argument>
</bean>
@@ -46,9 +48,11 @@
propertyPrefix="prefix."/>
<route>
<from uri="direct:start"/>
+<!-- <to uri="hdfs2://localhost:9000/hdfs/test-camel?fileSystemType=HDFS&splitStrategy=BYTES:5,IDLE:1000"/> -->
<to uri="hdfs2://[[karaf.base]]/hdfs/test-camel?fileSystemType=LOCAL&splitStrategy=BYTES:5,IDLE:1000"/>
</route>
<route>
+<!-- <from uri="hdfs2://localhost:9000/hdfs/test-camel?pattern=*&initialDelay=2000&fileSystemType=HDFS&chunkSize=5"/> -->
<from uri="hdfs2://[[karaf.base]]/hdfs/test-camel?pattern=*&initialDelay=2000&fileSystemType=LOCAL&chunkSize=5"/>
<to uri="mock:result"/>
</route>