You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@carbondata.apache.org by "Ajantha Bhat (Jira)" <ji...@apache.org> on 2021/01/05 17:32:00 UTC

[jira] [Resolved] (CARBONDATA-3987) Issues in SDK Pagination reader (2 issues)

     [ https://issues.apache.org/jira/browse/CARBONDATA-3987?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Ajantha Bhat resolved CARBONDATA-3987.
--------------------------------------
    Fix Version/s: 2.2.0
       Resolution: Fixed

> Issues in SDK Pagination reader (2 issues)
> ------------------------------------------
>
>                 Key: CARBONDATA-3987
>                 URL: https://issues.apache.org/jira/browse/CARBONDATA-3987
>             Project: CarbonData
>          Issue Type: Bug
>          Components: other
>    Affects Versions: 2.1.0
>            Reporter: Chetan Bhat
>            Priority: Minor
>             Fix For: 2.2.0
>
>          Time Spent: 6.5h
>  Remaining Estimate: 0h
>
> Issue 1 : 
> write data to table and insert into one more row , error is thrown when try to read new added row where as getTotalRows get incremented by 1.
> Test code-
> /**
>  * Carbon Files are written using CarbonWriter in outputpath
>  *
>  * Carbon Files are read using paginationCarbonReader object
>  * Checking pagination with insert on large data with 8 split
>  */
>  @Test
>  public void testSDKPaginationInsertData() throws IOException, InvalidLoadOptionException, InterruptedException {
>  System.out.println("___________________________________________" + name.getMethodName() + " TestCase Execution is started________________________________________________");
> //
> // String outputPath1 = getOutputPath(outputDir, name.getMethodName() + "large");
> //
> // long uid = 123456;
> // TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai"));
> // writeMultipleCarbonFiles("id int,name string,rank short,salary double,active boolean,dob date,doj timestamp,city string,dept string", getDatas(), outputPath1, uid, null, null);
> //
> // System.out.println("Data is written");
> List<String[]> data1 = new ArrayList<String[]>();
>  String[] row1 = \{"1", "AAA", "3", "3444345.66", "true", "1979-12-09", "2011-2-10 1:00:20", "Pune", "IT"};
>  String[] row2 = \{"2", "BBB", "2", "543124.66", "false", "1987-2-19", "2017-1-1 12:00:20", "Bangalore", "DATA"};
>  String[] row3 = \{"3", "CCC", "1", "787878.888", "false", "1982-05-12", "2015-12-1 2:20:20", "Pune", "DATA"};
>  String[] row4 = \{"4", "DDD", "1", "99999.24", "true", "1981-04-09", "2000-1-15 7:00:20", "Delhi", "MAINS"};
>  String[] row5 = \{"5", "EEE", "3", "545656.99", "true", "1987-12-09", "2017-11-25 04:00:20", "Delhi", "IT"};
> data1.add(row1);
>  data1.add(row2);
>  data1.add(row3);
>  data1.add(row4);
>  data1.add(row5);
> String outputPath1 = getOutputPath(outputDir, name.getMethodName() + "large");
> long uid = 123456;
>  TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai"));
>  writeMultipleCarbonFiles("id int,name string,rank short,salary double,active boolean,dob date,doj timestamp,city string,dept string", data1, outputPath1, uid, null, null);
> System.out.println("Data is written");
> String hdfsPath1 = moveFiles(outputPath1, outputPath1);
>  String datapath1 = hdfsPath1.concat("/" + name.getMethodName() + "large");
>  System.out.println("HDFS Data Path is: " + datapath1);
> runSQL("create table " + name.getMethodName() + "large" + " using carbon location '" + datapath1 + "'");
>  System.out.println("Table " + name.getMethodName() + " is created Successfully");
>  runSQL("select count(*) from " + name.getMethodName() + "large");
>  long uid1 = 123;
>  String outputPath = getOutputPath(outputDir, name.getMethodName());
>  List<String[]> data = new ArrayList<String[]>();
>  String[] row = \{"222", "Daisy", "3", "334.456", "true", "1956-11-08", "2013-12-10 12:00:20", "Pune", "IT"};
>  data.add(row);
>  writeData("id int,name string,rank short,salary double,active boolean,dob date,doj timestamp,city string,dept string", data, outputPath, uid, null, null);
>  String hdfsPath = moveFiles(outputPath, outputPath);
>  String datapath = hdfsPath.concat("/" + name.getMethodName());
> runSQL("create table " + name.getMethodName() + " using carbon location '" + datapath + "'");
>  runSQL("select count(*) from " + name.getMethodName());
>  System.out.println("----Insert------");
>  runSQL("insert into table " + name.getMethodName() + " select * from " + name.getMethodName() + "large");
>  System.out.println("Inserted");
>  System.out.println("----------After Insert--------------");
>  System.out.println("----Query 1----");
>  runSQL("select count(*) from " + name.getMethodName());
>  // configure cache size = 4 blocklet
>  CarbonProperties.getInstance()
>  .addProperty(CarbonCommonConstants.CARBON_MAX_PAGINATION_LRU_CACHE_SIZE_IN_MB, "4");
> CarbonReaderBuilder carbonReaderBuilder = CarbonReader.builder(datapath, "_temp").withPaginationSupport().projection(new String[]\{"id","name","rank","salary","active","dob","doj","city","dept"});
>  PaginationCarbonReader<Object> paginationCarbonReader =
>  (PaginationCarbonReader<Object>) carbonReaderBuilder.build();
>  File[] dataFiles1 = new File(datapath).listFiles(new FilenameFilter() {
>  @Override public boolean accept(File dir, String name) {
>  return name.endsWith("carbondata");
>  }
>  });
>  String version=CarbonSchemaReader.getVersionDetails(dataFiles1[0].getAbsolutePath());
>  System.out.println("version "+version);
> System.out.println("Total no of rows is : "+paginationCarbonReader.getTotalRows() );
>  assertTrue(paginationCarbonReader.getTotalRows() == 6);
> Object[] rows=paginationCarbonReader.read(1,6);
>  //assertTrue(rows.length==5);
>  for (Object rowss : rows) {
>  System.out.println(((Object[]) rowss)[0]);
>  // assertTrue (((Object[]) row)[1].equals(5001));
>  }
> // close the reader
>  paginationCarbonReader.close();
> }
>  
> Issue 2 : when filter () is used to filter certain row . getTotalRows() still showing previous total no of row where as when try to read all the row getting error.
> /**
>  * Carbon Files are written using CarbonWriter in outputpath
>  *
>  * Carbon Files are read using paginationCarbonReader object with filter
>  */
>  @Test
>  public void testSDKPaginationFilter() throws IOException, InvalidLoadOptionException, InterruptedException {
>  System.out.println("___________________________________________" + name.getMethodName() + " TestCase Execution is started________________________________________________");
> List<String []> data =new ArrayList<String []>();
>  String [] row1= \{"100","MNO","A","1001"};
>  String [] row2= \{"100","MNOP","C","3001"};
>  String [] row3= \{"100","MNOQ","X","2001"};
>  String [] row4= \{"100","MNOR","Z","7001"};
>  String [] row5= \{"100","MNOS","P","5001"};
>  data.add(row1);
>  data.add(row2);
>  data.add(row3);
>  data.add(row4);
>  data.add(row5);
> String outputPath=getOutputPath(outputDir,name.getMethodName());
>  boolean isTransactionalTable=false;
>  long uid=System.currentTimeMillis();
>  String blockletsize= String.valueOf(2);
>  String blocksize= String.valueOf(4);
>  String [] sortColumns=\{"c4","c3"};
>  writeData("c1 int,c2 string,c3 string,c4 int",data,outputPath, uid,blocksize,blockletsize,sortColumns);
>  System.out.println("Data is written");
> String hdfsPath = moveFiles(outputPath, outputPath);
>  String dataPath = hdfsPath.concat("/" + name.getMethodName());
>  System.out.println("HDFS Data Path is: " + dataPath);
> // configure cache size = 4 blocklet
>  CarbonProperties.getInstance()
>  .addProperty(CarbonCommonConstants.CARBON_MAX_PAGINATION_LRU_CACHE_SIZE_IN_MB, "4");
> //filter expression
>  EqualToExpression equalExpression =
>  new EqualToExpression(new ColumnExpression("c3", DataTypes.STRING),
>  new LiteralExpression("P", DataTypes.STRING));
> CarbonReaderBuilder carbonReaderBuilder = CarbonReader.builder(dataPath, "_temp").withPaginationSupport().projection(new String[]\{"c2", "c4"}).filter(equalExpression);
>  PaginationCarbonReader<Object> paginationCarbonReader =
>  (PaginationCarbonReader<Object>) carbonReaderBuilder.build();
>  File[] dataFiles1 = new File(dataPath).listFiles(new FilenameFilter() {
>  @Override public boolean accept(File dir, String name) {
>  return name.endsWith("carbondata");
>  }
>  });
>  String version=CarbonSchemaReader.getVersionDetails(dataFiles1[0].getAbsolutePath());
>  System.out.println("version "+version);
> System.out.println("Total no of rows is : "+paginationCarbonReader.getTotalRows() );
>  assertTrue(paginationCarbonReader.getTotalRows() == 5);
> Object[] rows=paginationCarbonReader.read(1,5);
>  for (Object row : rows) {
>  System.out.println(((Object[]) row)[0]);
>  // assertTrue (((Object[]) row)[1].equals(5001));
>  }
>  // close the reader
>  paginationCarbonReader.close();
>  }
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)