org.apache.hadoop.hive.ql.plan.MapWork.getEventSourceColumnNameMap java code examples

private void removeTargetFromDPP(MapWork target) {
 Set<String> dppIds = target.getEventSourceColumnNameMap().keySet();
 for (String dppId : dppIds) {
  SparkPartitionPruningSinkOperator sink = idToDpps.get(dppId);
  Preconditions.checkNotNull(sink, "Unable to find DPP sink whose target work is removed.");
  SparkPartitionPruningSinkDesc desc = sink.getConf();
  desc.removeTarget(target.getName());
  // If the target can be removed, it means there's another MapWork that shares the same
  // DPP sink, and therefore it cannot be the only target.
  Preconditions.checkState(!desc.getTargetInfos().isEmpty(),
    "The removed target work is the only target.");
 }
}

private boolean targetsOfSameDPPSink(MapWork first, MapWork second) {
 Set<String> sources1 = first.getEventSourceColumnNameMap().keySet();
 Set<String> sources2 = second.getEventSourceColumnNameMap().keySet();
 if (!sources1.equals(sources2)) {
  return false;
  Set<String> names1 = first.getEventSourceColumnNameMap().get(source).stream().map(
    SparkPartitionPruningSinkDesc::stripOffTargetId).collect(Collectors.toSet());
  Set<String> names2 = second.getEventSourceColumnNameMap().get(source).stream().map(
    SparkPartitionPruningSinkDesc::stripOffTargetId).collect(Collectors.toSet());
  if (!names1.equals(names2)) {

public void initialize(MapWork work, JobConf jobConf) throws SerDeException {
 Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>();
 Set<String> sourceWorkIds = work.getEventSourceTableDescMap().keySet();
 for (String id : sourceWorkIds) {
  List<TableDesc> tables = work.getEventSourceTableDescMap().get(id);
  List<String> columnNames = work.getEventSourceColumnNameMap().get(id);
  List<ExprNodeDesc> partKeyExprs = work.getEventSourcePartKeyExprMap().get(id);
  Iterator<String> cit = columnNames.iterator();
  Iterator<ExprNodeDesc> pit = partKeyExprs.iterator();
  for (TableDesc t : tables) {
   String columnName = cit.next();
   ExprNodeDesc partKeyExpr = pit.next();
   SourceInfo si = new SourceInfo(t, partKeyExpr, columnName, jobConf);
   if (!sourceInfoMap.containsKey(id)) {
    sourceInfoMap.put(id, new ArrayList<SourceInfo>());
   }
   sourceInfoMap.get(id).add(si);
   // We could have multiple sources restrict the same column, need to take
   // the union of the values in that case.
   if (columnMap.containsKey(columnName)) {
    si.values = columnMap.get(columnName).values;
   }
   columnMap.put(columnName, si);
  }
 }
}

List<TableDesc> tables = work.getEventSourceTableDescMap().get(id);
List<String> columnNames = work.getEventSourceColumnNameMap().get(id);

/**
 * Add this DPP sink as a pruning source for the target MapWork. It means the DPP sink's output
 * will be used to prune a certain partition in the MapWork. The MapWork's event source maps will
 * be updated to remember the DPP sink's unique ID and corresponding target columns.
 */
public void addAsSourceEvent(MapWork mapWork, ExprNodeDesc partKey, String columnName,
  String columnType) {
 String sourceId = getUniqueId();
 SparkPartitionPruningSinkDesc conf = getConf();
 // store table descriptor in map-targetWork
 List<TableDesc> tableDescs = mapWork.getEventSourceTableDescMap().computeIfAbsent(sourceId,
   v -> new ArrayList<>());
 tableDescs.add(conf.getTable());
 // store partition key expr in map-targetWork
 List<ExprNodeDesc> partKeys = mapWork.getEventSourcePartKeyExprMap().computeIfAbsent(sourceId,
   v -> new ArrayList<>());
 partKeys.add(partKey);
 // store column name in map-targetWork
 List<String> columnNames = mapWork.getEventSourceColumnNameMap().computeIfAbsent(sourceId,
   v -> new ArrayList<>());
 columnNames.add(columnName);
 List<String> columnTypes = mapWork.getEventSourceColumnTypeMap().computeIfAbsent(sourceId,
   v -> new ArrayList<>());
 columnTypes.add(columnType);
}

List<String> columnNames = mapWork.getEventSourceColumnNameMap().get(sourceId);
if (columnNames != null) {
 columnNames.remove(columnName);
 if (columnNames.isEmpty()) {
  mapWork.getEventSourceColumnNameMap().remove(sourceId);

List<String> columnNames = work.getEventSourceColumnNameMap().get(s);

List<String> columnNames = work.getEventSourceColumnNameMap().get(s);

if (!work.getEventSourceColumnNameMap().containsKey(sourceName)) {
 work.getEventSourceColumnNameMap().put(sourceName, new LinkedList<String>());
List<String> columns = work.getEventSourceColumnNameMap().get(sourceName);
columns.add(eventDesc.getTargetColumnName());

if (!work.getEventSourceColumnNameMap().containsKey(sourceName)) {
 work.getEventSourceColumnNameMap().put(sourceName, new LinkedList<String>());
List<String> columns = work.getEventSourceColumnNameMap().get(sourceName);
columns.add(eventDesc.getTargetColumnName());

doReturn(columnMap).when(mapWork).getEventSourceColumnNameMap();
doReturn(exprMap).when(mapWork).getEventSourcePartKeyExprMap();
doReturn(typeMap).when(mapWork).getEventSourceColumnTypeMap();

if (!targetWork.getEventSourceColumnNameMap().containsKey(sourceId)) {
 targetWork.getEventSourceColumnNameMap().put(sourceId, new LinkedList<String>());
List<String> columns = targetWork.getEventSourceColumnNameMap().get(sourceId);
columns.add(desc.getTargetColumnName());

List<String> columnNames = work.getEventSourceColumnNameMap().get(s);

if (!work.getEventSourceColumnNameMap().containsKey(sourceName)) {
 work.getEventSourceColumnNameMap().put(sourceName, new LinkedList<String>());
List<String> columns = work.getEventSourceColumnNameMap().get(sourceName);
columns.add(eventDesc.getTargetColumnName());

How to use getEventSourceColumnNameMapmethodin org.apache.hadoop.hive.ql.plan.MapWork

Best Java code snippets using org.apache.hadoop.hive.ql.plan.MapWork.getEventSourceColumnNameMap (Showing top 14 results out of 315)

How to use
getEventSourceColumnNameMap
method
in
org.apache.hadoop.hive.ql.plan.MapWork