You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hbase.apache.org by "mingkeming (JIRA)" <ji...@apache.org> on 2010/02/17 11:21:27 UTC

[jira] Created: (HBASE-2232) Can not create insert more than half million rows

Can not create insert more than  half million rows
--------------------------------------------------

                 Key: HBASE-2232
                 URL: https://issues.apache.org/jira/browse/HBASE-2232
             Project: Hadoop HBase
          Issue Type: Bug
          Components: client, io, master, regionserver
    Affects Versions: 0.20.3
         Environment: Linux
            Reporter: mingkeming


The following code, which attempts to insert certain number of rows into a table. 
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
But the data disappear once it get  files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.



import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;

public class Test{

    public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
	HBaseAdmin admin  = new HBaseAdmin(config);	
	createTable(admin, tableName, columnFamilies);
	HTable table =  new HTable(config, tableName);	
	table.setAutoFlush(false);
	table.setWriteBufferSize(1024*1024*12);
	return table;
    }

    public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
	if(admin.tableExists(tableName))return false;
	HTableDescriptor desc  = new HTableDescriptor(tableName);
	for(String s : columnFamilies){
	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
	    col.setMaxVersions(1);
	    desc.addFamily(col);
	}
	admin.createTable(desc);
	return true;
    }
    
    public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
	byte[] bf = Bytes.toBytes(family);
	for(int i = 0; i < count; i++){
	    int  id          = i;
	    byte[] qualifier = Bytes.toBytes(i); // "i"
	    byte[] key       = Bytes.toBytes(i);
	    byte[] val       = Bytes.toBytes(i);
	    Put put = new Put(key);
	    put.setWriteToWAL(false); 
	    put.add(bf, qualifier, 0, val); 
	    table.put(put);
	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
	}
	table.flushCommits();
    }

    public static void count(HTable table)throws IOException{
	Scan scan = new Scan();
	ResultScanner scanner = table.getScanner(scan);       
	Result result = null;
	int i = 0;
	while( (result = scanner.next()) != null  ){
	    byte[] key  = result.getRow();
	    ++i;
	    if(i % 10000 == 0)System.out.println(i);
	}
	System.out.println("TOTAL========== "+i);
    }

    public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
	if(!admin.tableExists(tableName))return;
	admin.disableTable(tableName);
	admin.deleteTable(tableName);
    }
    
    public static void main(String[] args)throws Exception{
	int k = 1000;
	boolean insert = true;
	if(args.length > 0){
	    if("read".equals(args[0]))insert = false;
	    else k = Integer.parseInt(args[0]);
	}
	
	HBaseConfiguration config = new HBaseConfiguration();
	String tableName = "TestTable";
	String familyName = "test_family";
	HBaseAdmin admin  = new HBaseAdmin(config);
	removeTable(admin, tableName);
	HTable table = getTable(config, tableName, new String[]{familyName});
	if(insert)test_serial_insert(table, familyName, k*1000);
	count(table);
    }


-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (HBASE-2232) Can not create insert more than half million rows

Posted by "mingkeming (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12837131#action_12837131 ] 

mingkeming commented on HBASE-2232:
-----------------------------------

Hi, Lars,
Thanks for the comment. 
I have downloaded the latest from SVN and it now seems to work ok.
Why should hbase NOT work on local machine ?  That seems to be a bug  to me. 
I am trying to use  hbase on a single machine, basically using it as a giant hashmap. 
Isn't that hbase intended for, ie, a hashmap? Why should I have at least 2 machines to be able to use it ?  Is there any reason why it is OK for hbase to fail when used on 1 machine ? 

Thanks 

> Can not create insert more than  half million rows
> --------------------------------------------------
>
>                 Key: HBASE-2232
>                 URL: https://issues.apache.org/jira/browse/HBASE-2232
>             Project: Hadoop HBase
>          Issue Type: Bug
>          Components: client, io, master, regionserver
>    Affects Versions: 0.20.3
>         Environment: Linux
>            Reporter: mingkeming
>
> The following code, which attempts to insert certain number of rows into a table. 
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
> But the data disappear once it get 3  files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> To create 2Million rows , run it as  java  Test 2000
> To create 10 Millions rows, run it as java Test 10000
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
>     public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> 	HBaseAdmin admin  = new HBaseAdmin(config);	
> 	createTable(admin, tableName, columnFamilies);
> 	HTable table =  new HTable(config, tableName);	
> 	table.setAutoFlush(false);
> 	table.setWriteBufferSize(1024*1024*12);
> 	return table;
>     }
>     public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> 	if(admin.tableExists(tableName))return false;
> 	HTableDescriptor desc  = new HTableDescriptor(tableName);
> 	for(String s : columnFamilies){
> 	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> 	    col.setMaxVersions(1);
> 	    desc.addFamily(col);
> 	}
> 	admin.createTable(desc);
> 	return true;
>     }
>     
>     public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
> 	byte[] bf = Bytes.toBytes(family);
> 	for(int i = 0; i < count; i++){
> 	    int  id          = i;
> 	    byte[] qualifier = Bytes.toBytes(i); // "i"
> 	    byte[] key       = Bytes.toBytes(i);
> 	    byte[] val       = Bytes.toBytes(i);
> 	    Put put = new Put(key);
> 	    put.setWriteToWAL(false); 
> 	    put.add(bf, qualifier, 0, val); 
> 	    table.put(put);
> 	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
> 	}
> 	table.flushCommits();
>     }
>     public static void count(HTable table)throws IOException{
> 	Scan scan = new Scan();
> 	ResultScanner scanner = table.getScanner(scan);       
> 	Result result = null;
> 	int i = 0;
> 	while( (result = scanner.next()) != null  ){
> 	    byte[] key  = result.getRow();
> 	    ++i;
> 	    if(i % 10000 == 0)System.out.println(i);
> 	}
> 	System.out.println("TOTAL========== "+i);
>     }
>     public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> 	if(!admin.tableExists(tableName))return;
> 	admin.disableTable(tableName);
> 	admin.deleteTable(tableName);
>     }
>     
>     public static void main(String[] args)throws Exception{
> 	int k = 1000;
> 	boolean insert = true;
> 	if(args.length > 0){
> 	    if("read".equals(args[0]))insert = false;
> 	    else k = Integer.parseInt(args[0]);
> 	}
> 	
> 	HBaseConfiguration config = new HBaseConfiguration();
> 	String tableName = "TestTable";
> 	String familyName = "test_family";
> 	HBaseAdmin admin  = new HBaseAdmin(config);
> 	removeTable(admin, tableName);
> 	HTable table = getTable(config, tableName, new String[]{familyName});
> 	if(insert)test_serial_insert(table, familyName, k*1000);
> 	count(table);
>     }

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Commented: (HBASE-2232) Can not create insert more than half million rows

Posted by "Lars George (JIRA)" <ji...@apache.org>.
    [ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12834912#action_12834912 ] 

Lars George commented on HBASE-2232:
------------------------------------

Hey, are you saying that you did this on a local install, i.e. running all on one machine, not on a cluster? If so than there is no wonder it does not work, as local is for prototyping only. Please give us a few more details on your setup.

> Can not create insert more than  half million rows
> --------------------------------------------------
>
>                 Key: HBASE-2232
>                 URL: https://issues.apache.org/jira/browse/HBASE-2232
>             Project: Hadoop HBase
>          Issue Type: Bug
>          Components: client, io, master, regionserver
>    Affects Versions: 0.20.3
>         Environment: Linux
>            Reporter: mingkeming
>
> The following code, which attempts to insert certain number of rows into a table. 
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
> But the data disappear once it get 3  files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> To create 2Million rows , run it as  java  Test 2000
> To create 10 Millions rows, run it as java Test 10000
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
>     public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> 	HBaseAdmin admin  = new HBaseAdmin(config);	
> 	createTable(admin, tableName, columnFamilies);
> 	HTable table =  new HTable(config, tableName);	
> 	table.setAutoFlush(false);
> 	table.setWriteBufferSize(1024*1024*12);
> 	return table;
>     }
>     public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> 	if(admin.tableExists(tableName))return false;
> 	HTableDescriptor desc  = new HTableDescriptor(tableName);
> 	for(String s : columnFamilies){
> 	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> 	    col.setMaxVersions(1);
> 	    desc.addFamily(col);
> 	}
> 	admin.createTable(desc);
> 	return true;
>     }
>     
>     public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
> 	byte[] bf = Bytes.toBytes(family);
> 	for(int i = 0; i < count; i++){
> 	    int  id          = i;
> 	    byte[] qualifier = Bytes.toBytes(i); // "i"
> 	    byte[] key       = Bytes.toBytes(i);
> 	    byte[] val       = Bytes.toBytes(i);
> 	    Put put = new Put(key);
> 	    put.setWriteToWAL(false); 
> 	    put.add(bf, qualifier, 0, val); 
> 	    table.put(put);
> 	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
> 	}
> 	table.flushCommits();
>     }
>     public static void count(HTable table)throws IOException{
> 	Scan scan = new Scan();
> 	ResultScanner scanner = table.getScanner(scan);       
> 	Result result = null;
> 	int i = 0;
> 	while( (result = scanner.next()) != null  ){
> 	    byte[] key  = result.getRow();
> 	    ++i;
> 	    if(i % 10000 == 0)System.out.println(i);
> 	}
> 	System.out.println("TOTAL========== "+i);
>     }
>     public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> 	if(!admin.tableExists(tableName))return;
> 	admin.disableTable(tableName);
> 	admin.deleteTable(tableName);
>     }
>     
>     public static void main(String[] args)throws Exception{
> 	int k = 1000;
> 	boolean insert = true;
> 	if(args.length > 0){
> 	    if("read".equals(args[0]))insert = false;
> 	    else k = Integer.parseInt(args[0]);
> 	}
> 	
> 	HBaseConfiguration config = new HBaseConfiguration();
> 	String tableName = "TestTable";
> 	String familyName = "test_family";
> 	HBaseAdmin admin  = new HBaseAdmin(config);
> 	removeTable(admin, tableName);
> 	HTable table = getTable(config, tableName, new String[]{familyName});
> 	if(insert)test_serial_insert(table, familyName, k*1000);
> 	count(table);
>     }

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Resolved: (HBASE-2232) Can not create insert more than half million rows

Posted by "mingkeming (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

mingkeming resolved HBASE-2232.
-------------------------------

       Resolution: Cannot Reproduce
    Fix Version/s: 0.21.0

Can not reproduce the problem in the the latest SVN version.


> Can not create insert more than  half million rows
> --------------------------------------------------
>
>                 Key: HBASE-2232
>                 URL: https://issues.apache.org/jira/browse/HBASE-2232
>             Project: Hadoop HBase
>          Issue Type: Bug
>          Components: client, io, master, regionserver
>    Affects Versions: 0.20.3
>         Environment: Linux
>            Reporter: mingkeming
>             Fix For: 0.21.0
>
>
> The following code, which attempts to insert certain number of rows into a table. 
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
> But the data disappear once it get 3  files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> To create 2Million rows , run it as  java  Test 2000
> To create 10 Millions rows, run it as java Test 10000
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
>     public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> 	HBaseAdmin admin  = new HBaseAdmin(config);	
> 	createTable(admin, tableName, columnFamilies);
> 	HTable table =  new HTable(config, tableName);	
> 	table.setAutoFlush(false);
> 	table.setWriteBufferSize(1024*1024*12);
> 	return table;
>     }
>     public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> 	if(admin.tableExists(tableName))return false;
> 	HTableDescriptor desc  = new HTableDescriptor(tableName);
> 	for(String s : columnFamilies){
> 	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> 	    col.setMaxVersions(1);
> 	    desc.addFamily(col);
> 	}
> 	admin.createTable(desc);
> 	return true;
>     }
>     
>     public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
> 	byte[] bf = Bytes.toBytes(family);
> 	for(int i = 0; i < count; i++){
> 	    int  id          = i;
> 	    byte[] qualifier = Bytes.toBytes(i); // "i"
> 	    byte[] key       = Bytes.toBytes(i);
> 	    byte[] val       = Bytes.toBytes(i);
> 	    Put put = new Put(key);
> 	    put.setWriteToWAL(false); 
> 	    put.add(bf, qualifier, 0, val); 
> 	    table.put(put);
> 	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
> 	}
> 	table.flushCommits();
>     }
>     public static void count(HTable table)throws IOException{
> 	Scan scan = new Scan();
> 	ResultScanner scanner = table.getScanner(scan);       
> 	Result result = null;
> 	int i = 0;
> 	while( (result = scanner.next()) != null  ){
> 	    byte[] key  = result.getRow();
> 	    ++i;
> 	    if(i % 10000 == 0)System.out.println(i);
> 	}
> 	System.out.println("TOTAL========== "+i);
>     }
>     public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> 	if(!admin.tableExists(tableName))return;
> 	admin.disableTable(tableName);
> 	admin.deleteTable(tableName);
>     }
>     
>     public static void main(String[] args)throws Exception{
> 	int k = 1000;
> 	boolean insert = true;
> 	if(args.length > 0){
> 	    if("read".equals(args[0]))insert = false;
> 	    else k = Integer.parseInt(args[0]);
> 	}
> 	
> 	HBaseConfiguration config = new HBaseConfiguration();
> 	String tableName = "TestTable";
> 	String familyName = "test_family";
> 	HBaseAdmin admin  = new HBaseAdmin(config);
> 	removeTable(admin, tableName);
> 	HTable table = getTable(config, tableName, new String[]{familyName});
> 	if(insert)test_serial_insert(table, familyName, k*1000);
> 	count(table);
>     }

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Updated: (HBASE-2232) Can not create insert more than half million rows

Posted by "mingkeming (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

mingkeming updated HBASE-2232:
------------------------------

    Description: 
The following code, which attempts to insert certain number of rows into a table. 
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
But the data disappear once it get 3  files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.



import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;

public class Test{

    public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
	HBaseAdmin admin  = new HBaseAdmin(config);	
	createTable(admin, tableName, columnFamilies);
	HTable table =  new HTable(config, tableName);	
	table.setAutoFlush(false);
	table.setWriteBufferSize(1024*1024*12);
	return table;
    }

    public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
	if(admin.tableExists(tableName))return false;
	HTableDescriptor desc  = new HTableDescriptor(tableName);
	for(String s : columnFamilies){
	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
	    col.setMaxVersions(1);
	    desc.addFamily(col);
	}
	admin.createTable(desc);
	return true;
    }
    
    public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
	byte[] bf = Bytes.toBytes(family);
	for(int i = 0; i < count; i++){
	    int  id          = i;
	    byte[] qualifier = Bytes.toBytes(i); // "i"
	    byte[] key       = Bytes.toBytes(i);
	    byte[] val       = Bytes.toBytes(i);
	    Put put = new Put(key);
	    put.setWriteToWAL(false); 
	    put.add(bf, qualifier, 0, val); 
	    table.put(put);
	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
	}
	table.flushCommits();
    }

    public static void count(HTable table)throws IOException{
	Scan scan = new Scan();
	ResultScanner scanner = table.getScanner(scan);       
	Result result = null;
	int i = 0;
	while( (result = scanner.next()) != null  ){
	    byte[] key  = result.getRow();
	    ++i;
	    if(i % 10000 == 0)System.out.println(i);
	}
	System.out.println("TOTAL========== "+i);
    }

    public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
	if(!admin.tableExists(tableName))return;
	admin.disableTable(tableName);
	admin.deleteTable(tableName);
    }
    
    public static void main(String[] args)throws Exception{
	int k = 1000;
	boolean insert = true;
	if(args.length > 0){
	    if("read".equals(args[0]))insert = false;
	    else k = Integer.parseInt(args[0]);
	}
	
	HBaseConfiguration config = new HBaseConfiguration();
	String tableName = "TestTable";
	String familyName = "test_family";
	HBaseAdmin admin  = new HBaseAdmin(config);
	removeTable(admin, tableName);
	HTable table = getTable(config, tableName, new String[]{familyName});
	if(insert)test_serial_insert(table, familyName, k*1000);
	count(table);
    }


  was:
The following code, which attempts to insert certain number of rows into a table. 
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
But the data disappear once it get  files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.



import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;

public class Test{

    public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
	HBaseAdmin admin  = new HBaseAdmin(config);	
	createTable(admin, tableName, columnFamilies);
	HTable table =  new HTable(config, tableName);	
	table.setAutoFlush(false);
	table.setWriteBufferSize(1024*1024*12);
	return table;
    }

    public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
	if(admin.tableExists(tableName))return false;
	HTableDescriptor desc  = new HTableDescriptor(tableName);
	for(String s : columnFamilies){
	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
	    col.setMaxVersions(1);
	    desc.addFamily(col);
	}
	admin.createTable(desc);
	return true;
    }
    
    public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
	byte[] bf = Bytes.toBytes(family);
	for(int i = 0; i < count; i++){
	    int  id          = i;
	    byte[] qualifier = Bytes.toBytes(i); // "i"
	    byte[] key       = Bytes.toBytes(i);
	    byte[] val       = Bytes.toBytes(i);
	    Put put = new Put(key);
	    put.setWriteToWAL(false); 
	    put.add(bf, qualifier, 0, val); 
	    table.put(put);
	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
	}
	table.flushCommits();
    }

    public static void count(HTable table)throws IOException{
	Scan scan = new Scan();
	ResultScanner scanner = table.getScanner(scan);       
	Result result = null;
	int i = 0;
	while( (result = scanner.next()) != null  ){
	    byte[] key  = result.getRow();
	    ++i;
	    if(i % 10000 == 0)System.out.println(i);
	}
	System.out.println("TOTAL========== "+i);
    }

    public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
	if(!admin.tableExists(tableName))return;
	admin.disableTable(tableName);
	admin.deleteTable(tableName);
    }
    
    public static void main(String[] args)throws Exception{
	int k = 1000;
	boolean insert = true;
	if(args.length > 0){
	    if("read".equals(args[0]))insert = false;
	    else k = Integer.parseInt(args[0]);
	}
	
	HBaseConfiguration config = new HBaseConfiguration();
	String tableName = "TestTable";
	String familyName = "test_family";
	HBaseAdmin admin  = new HBaseAdmin(config);
	removeTable(admin, tableName);
	HTable table = getTable(config, tableName, new String[]{familyName});
	if(insert)test_serial_insert(table, familyName, k*1000);
	count(table);
    }



> Can not create insert more than  half million rows
> --------------------------------------------------
>
>                 Key: HBASE-2232
>                 URL: https://issues.apache.org/jira/browse/HBASE-2232
>             Project: Hadoop HBase
>          Issue Type: Bug
>          Components: client, io, master, regionserver
>    Affects Versions: 0.20.3
>         Environment: Linux
>            Reporter: mingkeming
>
> The following code, which attempts to insert certain number of rows into a table. 
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
> But the data disappear once it get 3  files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
>     public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> 	HBaseAdmin admin  = new HBaseAdmin(config);	
> 	createTable(admin, tableName, columnFamilies);
> 	HTable table =  new HTable(config, tableName);	
> 	table.setAutoFlush(false);
> 	table.setWriteBufferSize(1024*1024*12);
> 	return table;
>     }
>     public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> 	if(admin.tableExists(tableName))return false;
> 	HTableDescriptor desc  = new HTableDescriptor(tableName);
> 	for(String s : columnFamilies){
> 	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> 	    col.setMaxVersions(1);
> 	    desc.addFamily(col);
> 	}
> 	admin.createTable(desc);
> 	return true;
>     }
>     
>     public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
> 	byte[] bf = Bytes.toBytes(family);
> 	for(int i = 0; i < count; i++){
> 	    int  id          = i;
> 	    byte[] qualifier = Bytes.toBytes(i); // "i"
> 	    byte[] key       = Bytes.toBytes(i);
> 	    byte[] val       = Bytes.toBytes(i);
> 	    Put put = new Put(key);
> 	    put.setWriteToWAL(false); 
> 	    put.add(bf, qualifier, 0, val); 
> 	    table.put(put);
> 	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
> 	}
> 	table.flushCommits();
>     }
>     public static void count(HTable table)throws IOException{
> 	Scan scan = new Scan();
> 	ResultScanner scanner = table.getScanner(scan);       
> 	Result result = null;
> 	int i = 0;
> 	while( (result = scanner.next()) != null  ){
> 	    byte[] key  = result.getRow();
> 	    ++i;
> 	    if(i % 10000 == 0)System.out.println(i);
> 	}
> 	System.out.println("TOTAL========== "+i);
>     }
>     public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> 	if(!admin.tableExists(tableName))return;
> 	admin.disableTable(tableName);
> 	admin.deleteTable(tableName);
>     }
>     
>     public static void main(String[] args)throws Exception{
> 	int k = 1000;
> 	boolean insert = true;
> 	if(args.length > 0){
> 	    if("read".equals(args[0]))insert = false;
> 	    else k = Integer.parseInt(args[0]);
> 	}
> 	
> 	HBaseConfiguration config = new HBaseConfiguration();
> 	String tableName = "TestTable";
> 	String familyName = "test_family";
> 	HBaseAdmin admin  = new HBaseAdmin(config);
> 	removeTable(admin, tableName);
> 	HTable table = getTable(config, tableName, new String[]{familyName});
> 	if(insert)test_serial_insert(table, familyName, k*1000);
> 	count(table);
>     }

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.


[jira] Updated: (HBASE-2232) Can not create insert more than half million rows

Posted by "mingkeming (JIRA)" <ji...@apache.org>.
     [ https://issues.apache.org/jira/browse/HBASE-2232?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

mingkeming updated HBASE-2232:
------------------------------

    Description: 
The following code, which attempts to insert certain number of rows into a table. 
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
But the data disappear once it get 3  files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.

To create 2Million rows , run it as  java  Test 2000
To create 10 Millions rows, run it as java Test 10000


import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;

public class Test{

    public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
	HBaseAdmin admin  = new HBaseAdmin(config);	
	createTable(admin, tableName, columnFamilies);
	HTable table =  new HTable(config, tableName);	
	table.setAutoFlush(false);
	table.setWriteBufferSize(1024*1024*12);
	return table;
    }

    public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
	if(admin.tableExists(tableName))return false;
	HTableDescriptor desc  = new HTableDescriptor(tableName);
	for(String s : columnFamilies){
	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
	    col.setMaxVersions(1);
	    desc.addFamily(col);
	}
	admin.createTable(desc);
	return true;
    }
    
    public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
	byte[] bf = Bytes.toBytes(family);
	for(int i = 0; i < count; i++){
	    int  id          = i;
	    byte[] qualifier = Bytes.toBytes(i); // "i"
	    byte[] key       = Bytes.toBytes(i);
	    byte[] val       = Bytes.toBytes(i);
	    Put put = new Put(key);
	    put.setWriteToWAL(false); 
	    put.add(bf, qualifier, 0, val); 
	    table.put(put);
	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
	}
	table.flushCommits();
    }

    public static void count(HTable table)throws IOException{
	Scan scan = new Scan();
	ResultScanner scanner = table.getScanner(scan);       
	Result result = null;
	int i = 0;
	while( (result = scanner.next()) != null  ){
	    byte[] key  = result.getRow();
	    ++i;
	    if(i % 10000 == 0)System.out.println(i);
	}
	System.out.println("TOTAL========== "+i);
    }

    public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
	if(!admin.tableExists(tableName))return;
	admin.disableTable(tableName);
	admin.deleteTable(tableName);
    }
    
    public static void main(String[] args)throws Exception{
	int k = 1000;
	boolean insert = true;
	if(args.length > 0){
	    if("read".equals(args[0]))insert = false;
	    else k = Integer.parseInt(args[0]);
	}
	
	HBaseConfiguration config = new HBaseConfiguration();
	String tableName = "TestTable";
	String familyName = "test_family";
	HBaseAdmin admin  = new HBaseAdmin(config);
	removeTable(admin, tableName);
	HTable table = getTable(config, tableName, new String[]{familyName});
	if(insert)test_serial_insert(table, familyName, k*1000);
	count(table);
    }


  was:
The following code, which attempts to insert certain number of rows into a table. 
This is running with hbase 0.20.3 after downloading without changing config.
Does not work if there is more than 500K or so rows .
I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
But the data disappear once it get 3  files of size around 16MB.
I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.



import java.io.*;
import java.util.*;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;

public class Test{

    public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
	HBaseAdmin admin  = new HBaseAdmin(config);	
	createTable(admin, tableName, columnFamilies);
	HTable table =  new HTable(config, tableName);	
	table.setAutoFlush(false);
	table.setWriteBufferSize(1024*1024*12);
	return table;
    }

    public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
	if(admin.tableExists(tableName))return false;
	HTableDescriptor desc  = new HTableDescriptor(tableName);
	for(String s : columnFamilies){
	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
	    col.setMaxVersions(1);
	    desc.addFamily(col);
	}
	admin.createTable(desc);
	return true;
    }
    
    public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
	byte[] bf = Bytes.toBytes(family);
	for(int i = 0; i < count; i++){
	    int  id          = i;
	    byte[] qualifier = Bytes.toBytes(i); // "i"
	    byte[] key       = Bytes.toBytes(i);
	    byte[] val       = Bytes.toBytes(i);
	    Put put = new Put(key);
	    put.setWriteToWAL(false); 
	    put.add(bf, qualifier, 0, val); 
	    table.put(put);
	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
	}
	table.flushCommits();
    }

    public static void count(HTable table)throws IOException{
	Scan scan = new Scan();
	ResultScanner scanner = table.getScanner(scan);       
	Result result = null;
	int i = 0;
	while( (result = scanner.next()) != null  ){
	    byte[] key  = result.getRow();
	    ++i;
	    if(i % 10000 == 0)System.out.println(i);
	}
	System.out.println("TOTAL========== "+i);
    }

    public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
	if(!admin.tableExists(tableName))return;
	admin.disableTable(tableName);
	admin.deleteTable(tableName);
    }
    
    public static void main(String[] args)throws Exception{
	int k = 1000;
	boolean insert = true;
	if(args.length > 0){
	    if("read".equals(args[0]))insert = false;
	    else k = Integer.parseInt(args[0]);
	}
	
	HBaseConfiguration config = new HBaseConfiguration();
	String tableName = "TestTable";
	String familyName = "test_family";
	HBaseAdmin admin  = new HBaseAdmin(config);
	removeTable(admin, tableName);
	HTable table = getTable(config, tableName, new String[]{familyName});
	if(insert)test_serial_insert(table, familyName, k*1000);
	count(table);
    }



> Can not create insert more than  half million rows
> --------------------------------------------------
>
>                 Key: HBASE-2232
>                 URL: https://issues.apache.org/jira/browse/HBASE-2232
>             Project: Hadoop HBase
>          Issue Type: Bug
>          Components: client, io, master, regionserver
>    Affects Versions: 0.20.3
>         Environment: Linux
>            Reporter: mingkeming
>
> The following code, which attempts to insert certain number of rows into a table. 
> This is running with hbase 0.20.3 after downloading without changing config.
> Does not work if there is more than 500K or so rows .
> I can see data being created in $HBASE_HOME/TestTable/xxxx/test_family where xxxx is a number. 
> But the data disappear once it get 3  files of size around 16MB.
> I guess it is being compacted or moved to somewhere ? But I see nothing in $HBASE_HOME/TestTable/compaction.dir.
> To create 2Million rows , run it as  java  Test 2000
> To create 10 Millions rows, run it as java Test 10000
> import java.io.*;
> import java.util.*;
> import org.apache.hadoop.hbase.*;
> import org.apache.hadoop.hbase.io.BatchUpdate;
> import org.apache.hadoop.hbase.client.Get;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.client.HTable;
> import org.apache.hadoop.hbase.client.Put;
> import org.apache.hadoop.hbase.client.Result;
> import org.apache.hadoop.hbase.client.ResultScanner;
> import org.apache.hadoop.hbase.client.Scan;
> import org.apache.hadoop.hbase.util.Bytes;
> public class Test{
>     public static HTable getTable(HBaseConfiguration config, String tableName, String[] columnFamilies)throws IOException{
> 	HBaseAdmin admin  = new HBaseAdmin(config);	
> 	createTable(admin, tableName, columnFamilies);
> 	HTable table =  new HTable(config, tableName);	
> 	table.setAutoFlush(false);
> 	table.setWriteBufferSize(1024*1024*12);
> 	return table;
>     }
>     public static boolean createTable(HBaseAdmin admin, String tableName, String[] columnFamilies)throws IOException{
> 	if(admin.tableExists(tableName))return false;
> 	HTableDescriptor desc  = new HTableDescriptor(tableName);
> 	for(String s : columnFamilies){
> 	    HColumnDescriptor col = new HColumnDescriptor(s.getBytes());
> 	    col.setMaxVersions(1);
> 	    desc.addFamily(col);
> 	}
> 	admin.createTable(desc);
> 	return true;
>     }
>     
>     public static void test_serial_insert(HTable table, String family, int count)throws IOException{ 
> 	byte[] bf = Bytes.toBytes(family);
> 	for(int i = 0; i < count; i++){
> 	    int  id          = i;
> 	    byte[] qualifier = Bytes.toBytes(i); // "i"
> 	    byte[] key       = Bytes.toBytes(i);
> 	    byte[] val       = Bytes.toBytes(i);
> 	    Put put = new Put(key);
> 	    put.setWriteToWAL(false); 
> 	    put.add(bf, qualifier, 0, val); 
> 	    table.put(put);
> 	    if( (i+1) % 1000000 == 0){System.out.println( (i+1)/1000000 +  "  M"); }
> 	}
> 	table.flushCommits();
>     }
>     public static void count(HTable table)throws IOException{
> 	Scan scan = new Scan();
> 	ResultScanner scanner = table.getScanner(scan);       
> 	Result result = null;
> 	int i = 0;
> 	while( (result = scanner.next()) != null  ){
> 	    byte[] key  = result.getRow();
> 	    ++i;
> 	    if(i % 10000 == 0)System.out.println(i);
> 	}
> 	System.out.println("TOTAL========== "+i);
>     }
>     public static void removeTable(HBaseAdmin admin, String tableName)throws IOException{
> 	if(!admin.tableExists(tableName))return;
> 	admin.disableTable(tableName);
> 	admin.deleteTable(tableName);
>     }
>     
>     public static void main(String[] args)throws Exception{
> 	int k = 1000;
> 	boolean insert = true;
> 	if(args.length > 0){
> 	    if("read".equals(args[0]))insert = false;
> 	    else k = Integer.parseInt(args[0]);
> 	}
> 	
> 	HBaseConfiguration config = new HBaseConfiguration();
> 	String tableName = "TestTable";
> 	String familyName = "test_family";
> 	HBaseAdmin admin  = new HBaseAdmin(config);
> 	removeTable(admin, tableName);
> 	HTable table = getTable(config, tableName, new String[]{familyName});
> 	if(insert)test_serial_insert(table, familyName, k*1000);
> 	count(table);
>     }

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.