You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by GitBox <gi...@apache.org> on 2019/06/10 16:56:41 UTC

[GitHub] [flink] xuefuz commented on a change in pull request #8614: [FLINK-12727][hive] Make HiveTableOutputFormat support writing partitioned table

xuefuz commented on a change in pull request #8614: [FLINK-12727][hive] Make HiveTableOutputFormat support writing partitioned table
URL: https://github.com/apache/flink/pull/8614#discussion_r292096730
 
 

 ##########
 File path: flink-connectors/flink-connector-hive/src/main/java/org/apache/flink/batch/connectors/hive/HiveTableOutputFormat.java
 ##########
 @@ -268,11 +293,52 @@ public void writeRecord(Row record) throws IOException {
 		try {
 			HivePartitionWriter partitionWriter = staticWriter;
 			if (isDynamicPartition) {
-				// TODO: to be implemented
+				LinkedHashMap<String, String> dynPartSpec = new LinkedHashMap<>();
+				// only need to check the dynamic partitions
+				final int numStaticPart = hiveTablePartition.getPartitionSpec().size();
+				for (int i = dynPartOffset; i < record.getArity(); i++) {
+					// TODO: seems Hive also just calls toString(), need further investigation to confirm
+					// TODO: validate partition value
+					String partVal = record.getField(i).toString();
+					dynPartSpec.put(partitionCols.get(i - dynPartOffset + numStaticPart), partVal);
+				}
+				String partName = Warehouse.makePartPath(dynPartSpec);
+				partitionWriter = partitionToWriter.get(partName);
+				if (partitionWriter == null) {
+					String stagingDir = hiveTablePartition.getStorageDescriptor().getLocation();
+					partitionWriter = writerForLocation(stagingDir + Path.SEPARATOR + partName);
+					partitionToWriter.put(partName, partitionWriter);
+				}
 			}
 			partitionWriter.recordWriter.write(serializer.serialize(getConvertedRow(record), rowObjectInspector));
 		} catch (IOException | SerDeException e) {
 			throw new IOException("Could not write Record.", e);
+		} catch (MetaException e) {
+			throw new CatalogException(e);
+		}
+	}
+
+	// load a single partition
+	private void loadPartition(Path srcDir, Table table, Map<String, String> partSpec, HiveMetastoreClientWrapper client)
+			throws TException, IOException {
+		Path tblLocation = new Path(table.getSd().getLocation());
+		List<Partition> existingPart = client.listPartitions(dbName, tableName,
+				new ArrayList<>(partSpec.values()), (short) 1);
+		Path destDir = existingPart.isEmpty() ? new Path(tblLocation, Warehouse.makePartPath(partSpec)) :
+				new Path(existingPart.get(0).getSd().getLocation());
+		moveFiles(srcDir, destDir);
+		// register new partition if it doesn't exist
+		if (existingPart.isEmpty()) {
+			Partition partition = new Partition();
+			partition.setValues(new ArrayList<>(partSpec.values()));
+			StorageDescriptor sd = new StorageDescriptor(hiveTablePartition.getStorageDescriptor());
+			sd.setLocation(destDir.toString());
+			partition.setSd(sd);
+			partition.setDbName(dbName);
+			partition.setTableName(tableName);
+			partition.setCreateTime((int) System.currentTimeMillis());
+			partition.setLastAccessTime((int) System.currentTimeMillis());
 
 Review comment:
   Nit: We can save one system call by reusing the call (System.currentTimeMillis) value.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services