private void removeTargetFromDPP(MapWork target) { Set<String> dppIds = target.getEventSourceColumnNameMap().keySet(); for (String dppId : dppIds) { SparkPartitionPruningSinkOperator sink = idToDpps.get(dppId); Preconditions.checkNotNull(sink, "Unable to find DPP sink whose target work is removed."); SparkPartitionPruningSinkDesc desc = sink.getConf(); desc.removeTarget(target.getName()); // If the target can be removed, it means there's another MapWork that shares the same // DPP sink, and therefore it cannot be the only target. Preconditions.checkState(!desc.getTargetInfos().isEmpty(), "The removed target work is the only target."); } }
private boolean targetsOfSameDPPSink(MapWork first, MapWork second) { Set<String> sources1 = first.getEventSourceColumnNameMap().keySet(); Set<String> sources2 = second.getEventSourceColumnNameMap().keySet(); if (!sources1.equals(sources2)) { return false; Set<String> names1 = first.getEventSourceColumnNameMap().get(source).stream().map( SparkPartitionPruningSinkDesc::stripOffTargetId).collect(Collectors.toSet()); Set<String> names2 = second.getEventSourceColumnNameMap().get(source).stream().map( SparkPartitionPruningSinkDesc::stripOffTargetId).collect(Collectors.toSet()); if (!names1.equals(names2)) {
public void initialize(MapWork work, JobConf jobConf) throws SerDeException { Map<String, SourceInfo> columnMap = new HashMap<String, SourceInfo>(); Set<String> sourceWorkIds = work.getEventSourceTableDescMap().keySet(); for (String id : sourceWorkIds) { List<TableDesc> tables = work.getEventSourceTableDescMap().get(id); List<String> columnNames = work.getEventSourceColumnNameMap().get(id); List<ExprNodeDesc> partKeyExprs = work.getEventSourcePartKeyExprMap().get(id); Iterator<String> cit = columnNames.iterator(); Iterator<ExprNodeDesc> pit = partKeyExprs.iterator(); for (TableDesc t : tables) { String columnName = cit.next(); ExprNodeDesc partKeyExpr = pit.next(); SourceInfo si = new SourceInfo(t, partKeyExpr, columnName, jobConf); if (!sourceInfoMap.containsKey(id)) { sourceInfoMap.put(id, new ArrayList<SourceInfo>()); } sourceInfoMap.get(id).add(si); // We could have multiple sources restrict the same column, need to take // the union of the values in that case. if (columnMap.containsKey(columnName)) { si.values = columnMap.get(columnName).values; } columnMap.put(columnName, si); } } }
List<TableDesc> tables = work.getEventSourceTableDescMap().get(id); List<String> columnNames = work.getEventSourceColumnNameMap().get(id);
/** * Add this DPP sink as a pruning source for the target MapWork. It means the DPP sink's output * will be used to prune a certain partition in the MapWork. The MapWork's event source maps will * be updated to remember the DPP sink's unique ID and corresponding target columns. */ public void addAsSourceEvent(MapWork mapWork, ExprNodeDesc partKey, String columnName, String columnType) { String sourceId = getUniqueId(); SparkPartitionPruningSinkDesc conf = getConf(); // store table descriptor in map-targetWork List<TableDesc> tableDescs = mapWork.getEventSourceTableDescMap().computeIfAbsent(sourceId, v -> new ArrayList<>()); tableDescs.add(conf.getTable()); // store partition key expr in map-targetWork List<ExprNodeDesc> partKeys = mapWork.getEventSourcePartKeyExprMap().computeIfAbsent(sourceId, v -> new ArrayList<>()); partKeys.add(partKey); // store column name in map-targetWork List<String> columnNames = mapWork.getEventSourceColumnNameMap().computeIfAbsent(sourceId, v -> new ArrayList<>()); columnNames.add(columnName); List<String> columnTypes = mapWork.getEventSourceColumnTypeMap().computeIfAbsent(sourceId, v -> new ArrayList<>()); columnTypes.add(columnType); }
List<String> columnNames = mapWork.getEventSourceColumnNameMap().get(sourceId); if (columnNames != null) { columnNames.remove(columnName); if (columnNames.isEmpty()) { mapWork.getEventSourceColumnNameMap().remove(sourceId);
List<String> columnNames = work.getEventSourceColumnNameMap().get(s);
List<String> columnNames = work.getEventSourceColumnNameMap().get(s);
if (!work.getEventSourceColumnNameMap().containsKey(sourceName)) { work.getEventSourceColumnNameMap().put(sourceName, new LinkedList<String>()); List<String> columns = work.getEventSourceColumnNameMap().get(sourceName); columns.add(eventDesc.getTargetColumnName());
if (!work.getEventSourceColumnNameMap().containsKey(sourceName)) { work.getEventSourceColumnNameMap().put(sourceName, new LinkedList<String>()); List<String> columns = work.getEventSourceColumnNameMap().get(sourceName); columns.add(eventDesc.getTargetColumnName());
doReturn(columnMap).when(mapWork).getEventSourceColumnNameMap(); doReturn(exprMap).when(mapWork).getEventSourcePartKeyExprMap(); doReturn(typeMap).when(mapWork).getEventSourceColumnTypeMap();
if (!targetWork.getEventSourceColumnNameMap().containsKey(sourceId)) { targetWork.getEventSourceColumnNameMap().put(sourceId, new LinkedList<String>()); List<String> columns = targetWork.getEventSourceColumnNameMap().get(sourceId); columns.add(desc.getTargetColumnName());
List<String> columnNames = work.getEventSourceColumnNameMap().get(s);
if (!work.getEventSourceColumnNameMap().containsKey(sourceName)) { work.getEventSourceColumnNameMap().put(sourceName, new LinkedList<String>()); List<String> columns = work.getEventSourceColumnNameMap().get(sourceName); columns.add(eventDesc.getTargetColumnName());