You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hbase.apache.org by "mingkeming (JIRA)" <ji...@apache.org> on 2010/02/17 11:21:27 UTC
[jira] Created: (HBASE-2232) Can not create insert more than half
million rows
Can not create insert more than half million rows
--------------------------------------------------
Key: HBASE-2232
URL: https://issues.apache.org/jira/browse/HBASE-2232
Project: Hadoop HBase
Issue Type: Bug
Components: client, io, master, regionserver
Affects Versions: 0.20.3
Environment: Linux
Reporter: mingkeming
The following code, which attempts to insert certain number of rows into a table.
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
But the data disappear once it get files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
public class Test{
public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
HBaseAdmin admin = new HBaseAdmin(config);
createTable(admin, tableName, columnFamilies);
HTable table = new HTable(config, tableName);
table.setAutoFlush(false);
table.setWriteBufferSize(1024*1024*12);
return table;
}
public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
if(admin.tableExists(tableName))return false;
HTableDescriptor desc = new HTableDescriptor(tableName);
for(String s : columnFamilies){
HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
col.setMaxVersions(1);
desc.addFamily(col);
}
admin.createTable(desc);
return true;
}
public static void test_serial_insert(HTable table, String family, int count)throws IOException{
byte[] bf = Bytes.toBytes(family);
for(int i = 0; i < count; i++){
int id = i;
byte[] qualifier = Bytes.toBytes(i); // "i"
byte[] key = Bytes.toBytes(i);
byte[] val = Bytes.toBytes(i);
Put put = new Put(key);
put.setWriteToWAL(false);
put.add(bf, qualifier, 0, val);
table.put(put);
if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
}
table.flushCommits();
}
public static void count(HTable table)throws IOException{
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
Result result = null;
int i = 0;
while( (result = scanner.next()) != null ){
byte[] key = result.getRow();
++i;
if(i % 10000 == 0)System.out.println(i);
}
System.out.println("TOTAL========== "+i);
}
public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
if(!admin.tableExists(tableName))return;
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
public static void main(String[] args)throws Exception{
int k = 1000;
boolean insert = true;
if(args.length > 0){
if("read".equals(args[0]))insert = false;
else k = Integer.parseInt(args[0]);
}
HBaseConfiguration config = new HBaseConfiguration();
String tableName = "TestTable";
String familyName = "test_family";
HBaseAdmin admin = new HBaseAdmin(config);
removeTable(admin, tableName);
HTable table = getTable(config, tableName, new String[]{familyName});
if(insert)test_serial_insert(table, familyName, k*1000);
count(table);
}
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Commented: (HBASE-2232) Can not create insert more than
half million rows
Posted by "mingkeming (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12837131#action_12837131 ]
mingkeming commented on HBASE-2232:
-----------------------------------
Hi, Lars,
Thanks for the comment.
I have downloaded the latest from SVN and it now seems to work ok.
Why should hbase NOT work on local machine ? That seems to be a bug to me.
I am trying to use hbase on a single machine, basically using it as a giant hashmap.
Isn't that hbase intended for, ie, a hashmap? Why should I have at least 2 machines to be able to use it ? Is there any reason why it is OK for hbase to fail when used on 1 machine ?
Thanks
> Can not create insert more than half million rows
> --------------------------------------------------
>
> Key: HBASE-2232
> URL: https://issues.apache.org/jira/browse/HBASE-2232
> Project: Hadoop HBase
> Issue Type: Bug
> Components: client, io, master, regionserver
> Affects Versions: 0.20.3
> Environment: Linux
> Reporter: mingkeming
>
> The following code, which attempts to insert certain number of rows into a table.
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
> But the data disappear once it get 3 files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> To create 2Million rows , run it as java Test 2000
> To create 10 Millions rows, run it as java Test 10000
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
> public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> HBaseAdmin admin = new HBaseAdmin(config);
> createTable(admin, tableName, columnFamilies);
> HTable table = new HTable(config, tableName);
> table.setAutoFlush(false);
> table.setWriteBufferSize(1024*1024*12);
> return table;
> }
> public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> if(admin.tableExists(tableName))return false;
> HTableDescriptor desc = new HTableDescriptor(tableName);
> for(String s : columnFamilies){
> HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> col.setMaxVersions(1);
> desc.addFamily(col);
> }
> admin.createTable(desc);
> return true;
> }
>
> public static void test_serial_insert(HTable table, String family, int count)throws IOException{
> byte[] bf = Bytes.toBytes(family);
> for(int i = 0; i < count; i++){
> int id = i;
> byte[] qualifier = Bytes.toBytes(i); // "i"
> byte[] key = Bytes.toBytes(i);
> byte[] val = Bytes.toBytes(i);
> Put put = new Put(key);
> put.setWriteToWAL(false);
> put.add(bf, qualifier, 0, val);
> table.put(put);
> if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
> }
> table.flushCommits();
> }
> public static void count(HTable table)throws IOException{
> Scan scan = new Scan();
> ResultScanner scanner = table.getScanner(scan);
> Result result = null;
> int i = 0;
> while( (result = scanner.next()) != null ){
> byte[] key = result.getRow();
> ++i;
> if(i % 10000 == 0)System.out.println(i);
> }
> System.out.println("TOTAL========== "+i);
> }
> public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> if(!admin.tableExists(tableName))return;
> admin.disableTable(tableName);
> admin.deleteTable(tableName);
> }
>
> public static void main(String[] args)throws Exception{
> int k = 1000;
> boolean insert = true;
> if(args.length > 0){
> if("read".equals(args[0]))insert = false;
> else k = Integer.parseInt(args[0]);
> }
>
> HBaseConfiguration config = new HBaseConfiguration();
> String tableName = "TestTable";
> String familyName = "test_family";
> HBaseAdmin admin = new HBaseAdmin(config);
> removeTable(admin, tableName);
> HTable table = getTable(config, tableName, new String[]{familyName});
> if(insert)test_serial_insert(table, familyName, k*1000);
> count(table);
> }
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Commented: (HBASE-2232) Can not create insert more than
half million rows
Posted by "Lars George (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12834912#action_12834912 ]
Lars George commented on HBASE-2232:
------------------------------------
Hey, are you saying that you did this on a local install, i.e. running all on one machine, not on a cluster? If so than there is no wonder it does not work, as local is for prototyping only. Please give us a few more details on your setup.
> Can not create insert more than half million rows
> --------------------------------------------------
>
> Key: HBASE-2232
> URL: https://issues.apache.org/jira/browse/HBASE-2232
> Project: Hadoop HBase
> Issue Type: Bug
> Components: client, io, master, regionserver
> Affects Versions: 0.20.3
> Environment: Linux
> Reporter: mingkeming
>
> The following code, which attempts to insert certain number of rows into a table.
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
> But the data disappear once it get 3 files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> To create 2Million rows , run it as java Test 2000
> To create 10 Millions rows, run it as java Test 10000
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
> public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> HBaseAdmin admin = new HBaseAdmin(config);
> createTable(admin, tableName, columnFamilies);
> HTable table = new HTable(config, tableName);
> table.setAutoFlush(false);
> table.setWriteBufferSize(1024*1024*12);
> return table;
> }
> public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> if(admin.tableExists(tableName))return false;
> HTableDescriptor desc = new HTableDescriptor(tableName);
> for(String s : columnFamilies){
> HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> col.setMaxVersions(1);
> desc.addFamily(col);
> }
> admin.createTable(desc);
> return true;
> }
>
> public static void test_serial_insert(HTable table, String family, int count)throws IOException{
> byte[] bf = Bytes.toBytes(family);
> for(int i = 0; i < count; i++){
> int id = i;
> byte[] qualifier = Bytes.toBytes(i); // "i"
> byte[] key = Bytes.toBytes(i);
> byte[] val = Bytes.toBytes(i);
> Put put = new Put(key);
> put.setWriteToWAL(false);
> put.add(bf, qualifier, 0, val);
> table.put(put);
> if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
> }
> table.flushCommits();
> }
> public static void count(HTable table)throws IOException{
> Scan scan = new Scan();
> ResultScanner scanner = table.getScanner(scan);
> Result result = null;
> int i = 0;
> while( (result = scanner.next()) != null ){
> byte[] key = result.getRow();
> ++i;
> if(i % 10000 == 0)System.out.println(i);
> }
> System.out.println("TOTAL========== "+i);
> }
> public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> if(!admin.tableExists(tableName))return;
> admin.disableTable(tableName);
> admin.deleteTable(tableName);
> }
>
> public static void main(String[] args)throws Exception{
> int k = 1000;
> boolean insert = true;
> if(args.length > 0){
> if("read".equals(args[0]))insert = false;
> else k = Integer.parseInt(args[0]);
> }
>
> HBaseConfiguration config = new HBaseConfiguration();
> String tableName = "TestTable";
> String familyName = "test_family";
> HBaseAdmin admin = new HBaseAdmin(config);
> removeTable(admin, tableName);
> HTable table = getTable(config, tableName, new String[]{familyName});
> if(insert)test_serial_insert(table, familyName, k*1000);
> count(table);
> }
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Resolved: (HBASE-2232) Can not create insert more than half
million rows
Posted by "mingkeming (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
mingkeming resolved HBASE-2232.
-------------------------------
Resolution: Cannot Reproduce
Fix Version/s: 0.21.0
Can not reproduce the problem in the the latest SVN version.
> Can not create insert more than half million rows
> --------------------------------------------------
>
> Key: HBASE-2232
> URL: https://issues.apache.org/jira/browse/HBASE-2232
> Project: Hadoop HBase
> Issue Type: Bug
> Components: client, io, master, regionserver
> Affects Versions: 0.20.3
> Environment: Linux
> Reporter: mingkeming
> Fix For: 0.21.0
>
>
> The following code, which attempts to insert certain number of rows into a table.
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
> But the data disappear once it get 3 files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> To create 2Million rows , run it as java Test 2000
> To create 10 Millions rows, run it as java Test 10000
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
> public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> HBaseAdmin admin = new HBaseAdmin(config);
> createTable(admin, tableName, columnFamilies);
> HTable table = new HTable(config, tableName);
> table.setAutoFlush(false);
> table.setWriteBufferSize(1024*1024*12);
> return table;
> }
> public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> if(admin.tableExists(tableName))return false;
> HTableDescriptor desc = new HTableDescriptor(tableName);
> for(String s : columnFamilies){
> HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> col.setMaxVersions(1);
> desc.addFamily(col);
> }
> admin.createTable(desc);
> return true;
> }
>
> public static void test_serial_insert(HTable table, String family, int count)throws IOException{
> byte[] bf = Bytes.toBytes(family);
> for(int i = 0; i < count; i++){
> int id = i;
> byte[] qualifier = Bytes.toBytes(i); // "i"
> byte[] key = Bytes.toBytes(i);
> byte[] val = Bytes.toBytes(i);
> Put put = new Put(key);
> put.setWriteToWAL(false);
> put.add(bf, qualifier, 0, val);
> table.put(put);
> if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
> }
> table.flushCommits();
> }
> public static void count(HTable table)throws IOException{
> Scan scan = new Scan();
> ResultScanner scanner = table.getScanner(scan);
> Result result = null;
> int i = 0;
> while( (result = scanner.next()) != null ){
> byte[] key = result.getRow();
> ++i;
> if(i % 10000 == 0)System.out.println(i);
> }
> System.out.println("TOTAL========== "+i);
> }
> public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> if(!admin.tableExists(tableName))return;
> admin.disableTable(tableName);
> admin.deleteTable(tableName);
> }
>
> public static void main(String[] args)throws Exception{
> int k = 1000;
> boolean insert = true;
> if(args.length > 0){
> if("read".equals(args[0]))insert = false;
> else k = Integer.parseInt(args[0]);
> }
>
> HBaseConfiguration config = new HBaseConfiguration();
> String tableName = "TestTable";
> String familyName = "test_family";
> HBaseAdmin admin = new HBaseAdmin(config);
> removeTable(admin, tableName);
> HTable table = getTable(config, tableName, new String[]{familyName});
> if(insert)test_serial_insert(table, familyName, k*1000);
> count(table);
> }
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Updated: (HBASE-2232) Can not create insert more than half
million rows
Posted by "mingkeming (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
mingkeming updated HBASE-2232:
------------------------------
Description:
The following code, which attempts to insert certain number of rows into a table.
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
But the data disappear once it get 3 files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
public class Test{
public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
HBaseAdmin admin = new HBaseAdmin(config);
createTable(admin, tableName, columnFamilies);
HTable table = new HTable(config, tableName);
table.setAutoFlush(false);
table.setWriteBufferSize(1024*1024*12);
return table;
}
public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
if(admin.tableExists(tableName))return false;
HTableDescriptor desc = new HTableDescriptor(tableName);
for(String s : columnFamilies){
HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
col.setMaxVersions(1);
desc.addFamily(col);
}
admin.createTable(desc);
return true;
}
public static void test_serial_insert(HTable table, String family, int count)throws IOException{
byte[] bf = Bytes.toBytes(family);
for(int i = 0; i < count; i++){
int id = i;
byte[] qualifier = Bytes.toBytes(i); // "i"
byte[] key = Bytes.toBytes(i);
byte[] val = Bytes.toBytes(i);
Put put = new Put(key);
put.setWriteToWAL(false);
put.add(bf, qualifier, 0, val);
table.put(put);
if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
}
table.flushCommits();
}
public static void count(HTable table)throws IOException{
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
Result result = null;
int i = 0;
while( (result = scanner.next()) != null ){
byte[] key = result.getRow();
++i;
if(i % 10000 == 0)System.out.println(i);
}
System.out.println("TOTAL========== "+i);
}
public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
if(!admin.tableExists(tableName))return;
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
public static void main(String[] args)throws Exception{
int k = 1000;
boolean insert = true;
if(args.length > 0){
if("read".equals(args[0]))insert = false;
else k = Integer.parseInt(args[0]);
}
HBaseConfiguration config = new HBaseConfiguration();
String tableName = "TestTable";
String familyName = "test_family";
HBaseAdmin admin = new HBaseAdmin(config);
removeTable(admin, tableName);
HTable table = getTable(config, tableName, new String[]{familyName});
if(insert)test_serial_insert(table, familyName, k*1000);
count(table);
}
was:
The following code, which attempts to insert certain number of rows into a table.
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
But the data disappear once it get files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
public class Test{
public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
HBaseAdmin admin = new HBaseAdmin(config);
createTable(admin, tableName, columnFamilies);
HTable table = new HTable(config, tableName);
table.setAutoFlush(false);
table.setWriteBufferSize(1024*1024*12);
return table;
}
public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
if(admin.tableExists(tableName))return false;
HTableDescriptor desc = new HTableDescriptor(tableName);
for(String s : columnFamilies){
HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
col.setMaxVersions(1);
desc.addFamily(col);
}
admin.createTable(desc);
return true;
}
public static void test_serial_insert(HTable table, String family, int count)throws IOException{
byte[] bf = Bytes.toBytes(family);
for(int i = 0; i < count; i++){
int id = i;
byte[] qualifier = Bytes.toBytes(i); // "i"
byte[] key = Bytes.toBytes(i);
byte[] val = Bytes.toBytes(i);
Put put = new Put(key);
put.setWriteToWAL(false);
put.add(bf, qualifier, 0, val);
table.put(put);
if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
}
table.flushCommits();
}
public static void count(HTable table)throws IOException{
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
Result result = null;
int i = 0;
while( (result = scanner.next()) != null ){
byte[] key = result.getRow();
++i;
if(i % 10000 == 0)System.out.println(i);
}
System.out.println("TOTAL========== "+i);
}
public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
if(!admin.tableExists(tableName))return;
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
public static void main(String[] args)throws Exception{
int k = 1000;
boolean insert = true;
if(args.length > 0){
if("read".equals(args[0]))insert = false;
else k = Integer.parseInt(args[0]);
}
HBaseConfiguration config = new HBaseConfiguration();
String tableName = "TestTable";
String familyName = "test_family";
HBaseAdmin admin = new HBaseAdmin(config);
removeTable(admin, tableName);
HTable table = getTable(config, tableName, new String[]{familyName});
if(insert)test_serial_insert(table, familyName, k*1000);
count(table);
}
> Can not create insert more than half million rows
> --------------------------------------------------
>
> Key: HBASE-2232
> URL: https://issues.apache.org/jira/browse/HBASE-2232
> Project: Hadoop HBase
> Issue Type: Bug
> Components: client, io, master, regionserver
> Affects Versions: 0.20.3
> Environment: Linux
> Reporter: mingkeming
>
> The following code, which attempts to insert certain number of rows into a table.
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
> But the data disappear once it get 3 files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
> public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> HBaseAdmin admin = new HBaseAdmin(config);
> createTable(admin, tableName, columnFamilies);
> HTable table = new HTable(config, tableName);
> table.setAutoFlush(false);
> table.setWriteBufferSize(1024*1024*12);
> return table;
> }
> public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> if(admin.tableExists(tableName))return false;
> HTableDescriptor desc = new HTableDescriptor(tableName);
> for(String s : columnFamilies){
> HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> col.setMaxVersions(1);
> desc.addFamily(col);
> }
> admin.createTable(desc);
> return true;
> }
>
> public static void test_serial_insert(HTable table, String family, int count)throws IOException{
> byte[] bf = Bytes.toBytes(family);
> for(int i = 0; i < count; i++){
> int id = i;
> byte[] qualifier = Bytes.toBytes(i); // "i"
> byte[] key = Bytes.toBytes(i);
> byte[] val = Bytes.toBytes(i);
> Put put = new Put(key);
> put.setWriteToWAL(false);
> put.add(bf, qualifier, 0, val);
> table.put(put);
> if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
> }
> table.flushCommits();
> }
> public static void count(HTable table)throws IOException{
> Scan scan = new Scan();
> ResultScanner scanner = table.getScanner(scan);
> Result result = null;
> int i = 0;
> while( (result = scanner.next()) != null ){
> byte[] key = result.getRow();
> ++i;
> if(i % 10000 == 0)System.out.println(i);
> }
> System.out.println("TOTAL========== "+i);
> }
> public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> if(!admin.tableExists(tableName))return;
> admin.disableTable(tableName);
> admin.deleteTable(tableName);
> }
>
> public static void main(String[] args)throws Exception{
> int k = 1000;
> boolean insert = true;
> if(args.length > 0){
> if("read".equals(args[0]))insert = false;
> else k = Integer.parseInt(args[0]);
> }
>
> HBaseConfiguration config = new HBaseConfiguration();
> String tableName = "TestTable";
> String familyName = "test_family";
> HBaseAdmin admin = new HBaseAdmin(config);
> removeTable(admin, tableName);
> HTable table = getTable(config, tableName, new String[]{familyName});
> if(insert)test_serial_insert(table, familyName, k*1000);
> count(table);
> }
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.
[jira] Updated: (HBASE-2232) Can not create insert more than half
million rows
Posted by "mingkeming (JIRA)" <ji...@apache.org>.
[ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
mingkeming updated HBASE-2232:
------------------------------
Description:
The following code, which attempts to insert certain number of rows into a table.
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
But the data disappear once it get 3 files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
To create 2Million rows , run it as java Test 2000
To create 10 Millions rows, run it as java Test 10000
import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
public class Test{
public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
HBaseAdmin admin = new HBaseAdmin(config);
createTable(admin, tableName, columnFamilies);
HTable table = new HTable(config, tableName);
table.setAutoFlush(false);
table.setWriteBufferSize(1024*1024*12);
return table;
}
public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
if(admin.tableExists(tableName))return false;
HTableDescriptor desc = new HTableDescriptor(tableName);
for(String s : columnFamilies){
HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
col.setMaxVersions(1);
desc.addFamily(col);
}
admin.createTable(desc);
return true;
}
public static void test_serial_insert(HTable table, String family, int count)throws IOException{
byte[] bf = Bytes.toBytes(family);
for(int i = 0; i < count; i++){
int id = i;
byte[] qualifier = Bytes.toBytes(i); // "i"
byte[] key = Bytes.toBytes(i);
byte[] val = Bytes.toBytes(i);
Put put = new Put(key);
put.setWriteToWAL(false);
put.add(bf, qualifier, 0, val);
table.put(put);
if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
}
table.flushCommits();
}
public static void count(HTable table)throws IOException{
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
Result result = null;
int i = 0;
while( (result = scanner.next()) != null ){
byte[] key = result.getRow();
++i;
if(i % 10000 == 0)System.out.println(i);
}
System.out.println("TOTAL========== "+i);
}
public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
if(!admin.tableExists(tableName))return;
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
public static void main(String[] args)throws Exception{
int k = 1000;
boolean insert = true;
if(args.length > 0){
if("read".equals(args[0]))insert = false;
else k = Integer.parseInt(args[0]);
}
HBaseConfiguration config = new HBaseConfiguration();
String tableName = "TestTable";
String familyName = "test_family";
HBaseAdmin admin = new HBaseAdmin(config);
removeTable(admin, tableName);
HTable table = getTable(config, tableName, new String[]{familyName});
if(insert)test_serial_insert(table, familyName, k*1000);
count(table);
}
was:
The following code, which attempts to insert certain number of rows into a table.
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
But the data disappear once it get 3 files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
public class Test{
public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
HBaseAdmin admin = new HBaseAdmin(config);
createTable(admin, tableName, columnFamilies);
HTable table = new HTable(config, tableName);
table.setAutoFlush(false);
table.setWriteBufferSize(1024*1024*12);
return table;
}
public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
if(admin.tableExists(tableName))return false;
HTableDescriptor desc = new HTableDescriptor(tableName);
for(String s : columnFamilies){
HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
col.setMaxVersions(1);
desc.addFamily(col);
}
admin.createTable(desc);
return true;
}
public static void test_serial_insert(HTable table, String family, int count)throws IOException{
byte[] bf = Bytes.toBytes(family);
for(int i = 0; i < count; i++){
int id = i;
byte[] qualifier = Bytes.toBytes(i); // "i"
byte[] key = Bytes.toBytes(i);
byte[] val = Bytes.toBytes(i);
Put put = new Put(key);
put.setWriteToWAL(false);
put.add(bf, qualifier, 0, val);
table.put(put);
if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
}
table.flushCommits();
}
public static void count(HTable table)throws IOException{
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
Result result = null;
int i = 0;
while( (result = scanner.next()) != null ){
byte[] key = result.getRow();
++i;
if(i % 10000 == 0)System.out.println(i);
}
System.out.println("TOTAL========== "+i);
}
public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
if(!admin.tableExists(tableName))return;
admin.disableTable(tableName);
admin.deleteTable(tableName);
}
public static void main(String[] args)throws Exception{
int k = 1000;
boolean insert = true;
if(args.length > 0){
if("read".equals(args[0]))insert = false;
else k = Integer.parseInt(args[0]);
}
HBaseConfiguration config = new HBaseConfiguration();
String tableName = "TestTable";
String familyName = "test_family";
HBaseAdmin admin = new HBaseAdmin(config);
removeTable(admin, tableName);
HTable table = getTable(config, tableName, new String[]{familyName});
if(insert)test_serial_insert(table, familyName, k*1000);
count(table);
}
> Can not create insert more than half million rows
> --------------------------------------------------
>
> Key: HBASE-2232
> URL: https://issues.apache.org/jira/browse/HBASE-2232
> Project: Hadoop HBase
> Issue Type: Bug
> Components: client, io, master, regionserver
> Affects Versions: 0.20.3
> Environment: Linux
> Reporter: mingkeming
>
> The following code, which attempts to insert certain number of rows into a table.
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number.
> But the data disappear once it get 3 files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> To create 2Million rows , run it as java Test 2000
> To create 10 Millions rows, run it as java Test 10000
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
> public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> HBaseAdmin admin = new HBaseAdmin(config);
> createTable(admin, tableName, columnFamilies);
> HTable table = new HTable(config, tableName);
> table.setAutoFlush(false);
> table.setWriteBufferSize(1024*1024*12);
> return table;
> }
> public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> if(admin.tableExists(tableName))return false;
> HTableDescriptor desc = new HTableDescriptor(tableName);
> for(String s : columnFamilies){
> HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> col.setMaxVersions(1);
> desc.addFamily(col);
> }
> admin.createTable(desc);
> return true;
> }
>
> public static void test_serial_insert(HTable table, String family, int count)throws IOException{
> byte[] bf = Bytes.toBytes(family);
> for(int i = 0; i < count; i++){
> int id = i;
> byte[] qualifier = Bytes.toBytes(i); // "i"
> byte[] key = Bytes.toBytes(i);
> byte[] val = Bytes.toBytes(i);
> Put put = new Put(key);
> put.setWriteToWAL(false);
> put.add(bf, qualifier, 0, val);
> table.put(put);
> if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 + " M"); }
> }
> table.flushCommits();
> }
> public static void count(HTable table)throws IOException{
> Scan scan = new Scan();
> ResultScanner scanner = table.getScanner(scan);
> Result result = null;
> int i = 0;
> while( (result = scanner.next()) != null ){
> byte[] key = result.getRow();
> ++i;
> if(i % 10000 == 0)System.out.println(i);
> }
> System.out.println("TOTAL========== "+i);
> }
> public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> if(!admin.tableExists(tableName))return;
> admin.disableTable(tableName);
> admin.deleteTable(tableName);
> }
>
> public static void main(String[] args)throws Exception{
> int k = 1000;
> boolean insert = true;
> if(args.length > 0){
> if("read".equals(args[0]))insert = false;
> else k = Integer.parseInt(args[0]);
> }
>
> HBaseConfiguration config = new HBaseConfiguration();
> String tableName = "TestTable";
> String familyName = "test_family";
> HBaseAdmin admin = new HBaseAdmin(config);
> removeTable(admin, tableName);
> HTable table = getTable(config, tableName, new String[]{familyName});
> if(insert)test_serial_insert(table, familyName, k*1000);
> count(table);
> }
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.