You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Andy Grove (JIRA)" <ji...@apache.org> on 2016/01/20 21:41:39 UTC
[jira] [Updated] (SPARK-12932) Cannot convert list of simple Java objects to Dataset (no encoder found)

     [ https://issues.apache.org/jira/browse/SPARK-12932?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Andy Grove updated SPARK-12932:
-------------------------------
    Description: 
When trying to create a Dataset from an RDD of Person (all using the Java API), I get the error "java.lang.UnsupportedOperationException: no encoder found for example_java.dataset.Person".

{code:title=Example.java}
public class JavaDatasetExample {

    public static void main(String[] args) throws Exception {

        SparkConf sparkConf = new SparkConf()
                .setAppName("Example")
                .setMaster("local[*]");

        JavaSparkContext sc = new JavaSparkContext(sparkConf);

        SQLContext sqlContext = new SQLContext(sc);

        List<Person> people = ImmutableList.of(
                new Person("Joe", "Bloggs", 21, "NY")
        );

        Dataset<Person> dataset = sqlContext.createDataset(people, Encoders.bean(Person.class));

{code}

{code:title=Person.java}
class Person implements Serializable {

    String first;
    String last;
    int age;
    String state;

    public Person() {
    }

    public Person(String first, String last, int age, String state) {
        this.first = first;
        this.last = last;
        this.age = age;
        this.state = state;
    }

    public String getFirst() {
        return first;
    }

    public String getLast() {
        return last;
    }

    public int getAge() {
        return age;
    }

    public String getState() {
        return state;
    }

}
{code}


  was:
When trying to create a Dataset from an RDD of Person (all using the Java API), I get the error "java.lang.UnsupportedOperationException: no encoder found for example_java.dataset.Person".

{code:title=Example.java}
public class JavaDatasetExample {

    public static void main(String[] args) throws Exception {

        SparkConf sparkConf = new SparkConf()
                .setAppName("Example")
                .setMaster("local[*]");

        JavaSparkContext sc = new JavaSparkContext(sparkConf);

        SQLContext sqlContext = new SQLContext(sc);

        List<Person> people = ImmutableList.of(
                new Person("Joe", "Bloggs", 21, "NY")
        );

        Dataset<Person> dataset = sqlContext.createDataset(people, Encoders.bean(Person.class));

{code}

{code:Person.java}
class Person implements Serializable {

    String first;
    String last;
    int age;
    String state;

    public Person() {
    }

    public Person(String first, String last, int age, String state) {
        this.first = first;
        this.last = last;
        this.age = age;
        this.state = state;
    }

    public String getFirst() {
        return first;
    }

    public String getLast() {
        return last;
    }

    public int getAge() {
        return age;
    }

    public String getState() {
        return state;
    }

}
{code}



> Cannot convert list of simple Java objects to Dataset (no encoder found)
> ------------------------------------------------------------------------
>
>                 Key: SPARK-12932
>                 URL: https://issues.apache.org/jira/browse/SPARK-12932
>             Project: Spark
>          Issue Type: Bug
>          Components: Java API
>    Affects Versions: 1.6.0
>         Environment: Ubuntu 15.10 / Java 8
>            Reporter: Andy Grove
>
> When trying to create a Dataset from an RDD of Person (all using the Java API), I get the error "java.lang.UnsupportedOperationException: no encoder found for example_java.dataset.Person".
> {code:title=Example.java}
> public class JavaDatasetExample {
>     public static void main(String[] args) throws Exception {
>         SparkConf sparkConf = new SparkConf()
>                 .setAppName("Example")
>                 .setMaster("local[*]");
>         JavaSparkContext sc = new JavaSparkContext(sparkConf);
>         SQLContext sqlContext = new SQLContext(sc);
>         List<Person> people = ImmutableList.of(
>                 new Person("Joe", "Bloggs", 21, "NY")
>         );
>         Dataset<Person> dataset = sqlContext.createDataset(people, Encoders.bean(Person.class));
> {code}
> {code:title=Person.java}
> class Person implements Serializable {
>     String first;
>     String last;
>     int age;
>     String state;
>     public Person() {
>     }
>     public Person(String first, String last, int age, String state) {
>         this.first = first;
>         this.last = last;
>         this.age = age;
>         this.state = state;
>     }
>     public String getFirst() {
>         return first;
>     }
>     public String getLast() {
>         return last;
>     }
>     public int getAge() {
>         return age;
>     }
>     public String getState() {
>         return state;
>     }
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org