You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by εΎει <xu...@gmail.com> on 2011/03/24 07:35:54 UTC
how to write a serde,i want support nutchsequence file .
can anyman help me ?or give me some documents relates.
i wand load data from nutch's sequence file.
my code is under.
i can execute load data script normal.
but when i run ' select * from table ' ,the error occur.
it prompt me 'Content cast Exception when 'Iterator<Writable> values =
(Iterator<Writable>)blob;' '
thanks help!
public class NutchSequenceFileSerDe implements SerDe {
public static final Log LOG =
LogFactory.getLog(NutchSequenceFileSerDe.class.getName());
int numColumns;
String inputRegex;
String outputFormatString;
Pattern inputPattern;
StructObjectInspector rowOI;
ArrayList<String> row;
@Override
public void initialize(Configuration conf, Properties tbl)
throws SerDeException {
// We can get the table definition from tbl.
// Read the configuration parameters
String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
String columnTypeProperty =
tbl.getProperty(Constants.LIST_COLUMN_TYPES);
List<String> columnNames = Arrays.asList(columnNameProperty.split(","));
List<TypeInfo> columnTypes = TypeInfoUtils
.getTypeInfosFromTypeString(columnTypeProperty);
assert columnNames.size() == columnTypes.size();
numColumns = columnNames.size();
// All columns have to be of type STRING.
for (int c = 0; c < numColumns; c++) {
if (!columnTypes.get(c).equals(TypeInfoFactory.stringTypeInfo)) {
throw new SerDeException(getClass().getName()
+ " only accepts string columns, but column[" + c + "] named "
+ columnNames.get(c) + " has type " + columnTypes.get(c));
}
}
// Constructing the row ObjectInspector:
// The row consists of some string columns, each column will be a java
// String object.
List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(
columnNames.size());
for (int c = 0; c < numColumns; c++) {
columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
// StandardStruct uses ArrayList to store the row.
rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(
columnNames, columnOIs);
// Constructing the row object, etc, which will be reused for all rows.
row = new ArrayList<String>(numColumns);
for (int c = 0; c < numColumns; c++) {
row.add(null);
}
outputFields = new Object[numColumns];
outputRowText = new Text();
}
@Override
public ObjectInspector getObjectInspector() throws SerDeException {
return rowOI;
}
@Override
public Class<? extends Writable> getSerializedClass() {
return Text.class;
}
// Number of rows not matching the regex
long unmatchedRows = 0;
long nextUnmatchedRows = 1;
// Number of rows that match the regex but have missing groups.
long partialMatchedRows = 0;
long nextPartialMatchedRows = 1;
long getNextNumberToDisplay(long now) {
return now * 10;
}
@Override
public Object deserialize(Writable blob) throws SerDeException {
Iterator<Writable> values = (Iterator<Writable>)blob;
//dump.append(key.toString() + "\001");
int colIndex =0;
while (values.hasNext()) {
try {
Writable value = values.next();//.get(); // unwrap
String vString = value.toString();
row.set(colIndex,vString);
} catch (RuntimeException e) {
row.set(colIndex, null);
}
colIndex ++;
}
return row;
}
Object[] outputFields;
Text outputRowText;
@Override
public Writable serialize(Object obj, ObjectInspector objInspector)
throws SerDeException {
// Get all the fields out.
// NOTE: The correct way to get fields out of the row is to use
// objInspector.
// The obj can be a Java ArrayList, or a Java class, or a byte[] or
// whatever.
// The only way to access the data inside the obj is through
// ObjectInspector.
StructObjectInspector outputRowOI = (StructObjectInspector)
objInspector;
List<? extends StructField> outputFieldRefs = outputRowOI
.getAllStructFieldRefs();
if (outputFieldRefs.size() != numColumns) {
throw new SerDeException("Cannot serialize the object because there
are "
+ outputFieldRefs.size() + " fields but the table has " +
numColumns
+ " columns.");
}
// Get all data out.
for (int c = 0; c < numColumns; c++) {
Object field = outputRowOI
.getStructFieldData(obj, outputFieldRefs.get(c));
ObjectInspector fieldOI = outputFieldRefs.get(c)
.getFieldObjectInspector();
// The data must be of type String
StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI;
// Convert the field to Java class String, because objects of String
type
// can be
// stored in String, Text, or some other classes.
outputFields[c] = fieldStringOI.getPrimitiveJavaObject(field);
}
// Format the String
String outputRowString = "";
for (Object o : outputFieldRefs) {
outputRowString += o.toString();
}
outputRowText.set(outputRowString);
return outputRowText;
}
}