protected Path getNominalPath(Path fpath) { Path nominal = null; boolean schemaless = fpath.toUri().getScheme() == null; for (Path onefile : conf.getPathToAliases().keySet()) { Path onepath = normalizePath(onefile, schemaless); Path curfpath = fpath; if(!schemaless && onepath.toUri().getScheme() == null) { curfpath = new Path(fpath.toUri().getPath()); } // check for the operators who will process rows coming to this Map Operator if (onepath.toUri().relativize(curfpath.toUri()).equals(curfpath.toUri())) { // not from this continue; } if (nominal != null) { throw new IllegalStateException("Ambiguous input path " + fpath); } nominal = onefile; break; } if (nominal == null) { throw new IllegalStateException("Invalid input path " + fpath); } return nominal; }
protected String getNominalPath(Path fpath) { Path nominal = null; boolean schemaless = fpath.toUri().getScheme() == null; for (Path onefile : conf.getPathToAliases().keySet()) { Path onepath = normalizePath(onefile, schemaless); Path curfpath = fpath; if(!schemaless && onepath.toUri().getScheme() == null) { curfpath = new Path(fpath.toUri().getPath()); } // check for the operators who will process rows coming to this Map Operator if (onepath.toUri().relativize(curfpath.toUri()).equals(curfpath.toUri())) { // not from this continue; } if (nominal != null) { throw new IllegalStateException("Ambiguous input path " + fpath); } nominal = onefile; break; } if (nominal == null) { throw new IllegalStateException("Invalid input path " + fpath); } return nominal.toString(); }
private void addCredentials(MapWork mapWork, DAG dag) { Set<Path> paths = mapWork.getPathToAliases().keySet(); if (!paths.isEmpty()) { Iterator<URI> pathIterator = Iterators.transform(paths.iterator(), new Function<Path, URI>() { @Override public URI apply(Path path) { return path.toUri(); } }); Set<URI> uris = new HashSet<URI>(); Iterators.addAll(uris, pathIterator); if (LOG.isDebugEnabled()) { for (URI uri: uris) { LOG.debug("Marking URI as needing credentials: "+uri); } } dag.addURIsForCredentials(uris); } }
/** * Hive uses tmp directories to capture the output of each FileSinkOperator. * This method creates all necessary tmp directories for FileSinks in the Mapwork. * * @param conf Used to get the right FileSystem * @param mWork Used to find FileSinkOperators * @throws IOException */ public static void createTmpDirs(Configuration conf, MapWork mWork) throws IOException { Map<Path, ArrayList<String>> pa = mWork.getPathToAliases(); if (pa != null) { // common case: 1 table scan per map-work // rare case: smb joins HashSet<String> aliases = new HashSet<String>(1); List<Operator<? extends OperatorDesc>> ops = new ArrayList<Operator<? extends OperatorDesc>>(); for (List<String> ls : pa.values()) { for (String a : ls) { aliases.add(a); } } for (String a : aliases) { ops.add(mWork.getAliasToWork().get(a)); } createTmpDirs(conf, ops); } }
/** * Hive uses tmp directories to capture the output of each FileSinkOperator. * This method creates all necessary tmp directories for FileSinks in the Mapwork. * * @param conf Used to get the right FileSystem * @param mWork Used to find FileSinkOperators * @throws IOException */ public static void createTmpDirs(Configuration conf, MapWork mWork) throws IOException { Map<Path, ArrayList<String>> pa = mWork.getPathToAliases(); if (MapUtils.isNotEmpty(pa)) { // common case: 1 table scan per map-work // rare case: smb joins HashSet<String> aliases = new HashSet<String>(1); List<Operator<? extends OperatorDesc>> ops = new ArrayList<Operator<? extends OperatorDesc>>(); for (List<String> ls : pa.values()) { for (String a : ls) { aliases.add(a); } } for (String a : aliases) { ops.add(mWork.getAliasToWork().get(a)); } createTmpDirs(conf, ops); } }
private void addCredentials(MapWork mapWork, DAG dag) { Set<Path> paths = mapWork.getPathToAliases().keySet(); if (!paths.isEmpty()) { Iterator<URI> pathIterator = Iterators.transform(paths.iterator(), new Function<Path, URI>() {
private void processAlias(MapWork work, Set<TableScanOperator> tableScans) { Set<String> aliases = new HashSet<>(); for (TableScanOperator tso : tableScans) { // use LinkedHashMap<String, Operator<? extends OperatorDesc>> // getAliasToWork() should not apply this for non-native table if (tso.getConf().getTableMetadata().getStorageHandler() != null) { continue; } String alias = getAliasForTableScanOperator(work, tso); aliases.add(alias); tso.getConf().setIsMetadataOnly(true); } // group path alias according to work Map<Path, ArrayList<String>> candidates = new HashMap<>(); for (Path path : work.getPaths()) { ArrayList<String> aliasesAffected = work.getPathToAliases().get(path); if (CollectionUtils.isNotEmpty(aliasesAffected)) { candidates.put(path, aliasesAffected); } } for (Entry<Path, ArrayList<String>> entry : candidates.entrySet()) { processAlias(work, entry.getKey(), entry.getValue(), aliases); } }
private static void updatePathForMapWork(Path newPath, MapWork work, Path path) { // update the work if (!newPath.equals(path)) { PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); work.addPathToAlias(newPath, work.getPathToAliases().get(path)); work.removePathToAlias(path); work.removePathToPartitionInfo(path); work.addPathToPartitionInfo(newPath, partDesc); } }
Iterator<Path> paths = mapWork.getPathToAliases().keySet().iterator(); if (paths.hasNext()) { PartitionDesc partDesc = mapWork.getPathToPartitionInfo().get(paths.next());
@Override public MapWork read(Kryo kryo, Input input, Class<MapWork> type) { MapWork mapWork = super.read(kryo, input, type); // The set methods in MapWork intern the any duplicate strings which is why we call them // during de-serialization mapWork.setPathToPartitionInfo(mapWork.getPathToPartitionInfo()); mapWork.setPathToAliases(mapWork.getPathToAliases()); return mapWork; } }
Map<TableDesc, StructObjectInspector> convertedOI = getConvertedOI(tableNameToConf); for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) { Path onefile = entry.getKey(); List<String> aliases = entry.getValue();
private void processAlias(MapWork work, HashSet<TableScanOperator> tableScans) { ArrayList<String> aliases = new ArrayList<String>(); for (TableScanOperator tso : tableScans) { // use LinkedHashMap<String, Operator<? extends OperatorDesc>> // getAliasToWork() // should not apply this for non-native table if (tso.getConf().getTableMetadata().getStorageHandler() != null) { continue; } String alias = getAliasForTableScanOperator(work, tso); aliases.add(alias); tso.getConf().setIsMetadataOnly(true); } // group path alias according to work LinkedHashMap<Path, ArrayList<String>> candidates = new LinkedHashMap<>(); for (Path path : work.getPaths()) { ArrayList<String> aliasesAffected = work.getPathToAliases().get(path); if (aliasesAffected != null && aliasesAffected.size() > 0) { candidates.put(path, aliasesAffected); } } for (Entry<Path, ArrayList<String>> entry : candidates.entrySet()) { processAlias(work, entry.getKey(), entry.getValue(), aliases); } }
Map<TableDesc, StructObjectInspector> convertedOI = getConvertedOI(tableNameToConf); for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) { Path onefile = entry.getKey(); List<String> aliases = entry.getValue();
Map<String, Configuration> tableNameToConf = new HashMap<>(); for (Map.Entry<Path, ArrayList<String>> e : conf.getPathToAliases().entrySet()) { List<String> aliases = e.getValue(); if (aliases == null || aliases.isEmpty()) {
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, Path hiveScratchDir, String alias) throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); if (tableDesc.isNonNative()) { // if it does not need native storage, we can't create an empty file for it. return null; } Properties props = tableDesc.getProperties(); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, false); LOG.info("Changed input file for alias {} to newPath", alias, newPath); // update the work LinkedHashMap<Path, ArrayList<String>> pathToAliases = work.getPathToAliases(); ArrayList<String> newList = new ArrayList<String>(1); newList.add(alias); pathToAliases.put(newPath, newList); work.setPathToAliases(pathToAliases); PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); work.addPathToPartitionInfo(newPath, pDesc); return newPath; }
@Test public void testGetAndSetConsistency() { MapWork mw = new MapWork(); LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>(); pathToAliases.put(new Path("p0"), Lists.newArrayList("a1", "a2")); mw.setPathToAliases(pathToAliases); LinkedHashMap<Path, ArrayList<String>> pta = mw.getPathToAliases(); assertEquals(pathToAliases, pta); }
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, Path hiveScratchDir, String alias) throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); if (tableDesc.isNonNative()) { // if it does not need native storage, we can't create an empty file for it. return null; } Properties props = tableDesc.getProperties(); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, false); if (LOG.isInfoEnabled()) { LOG.info("Changed input file for alias " + alias + " to " + newPath); } // update the work LinkedHashMap<Path, ArrayList<String>> pathToAliases = work.getPathToAliases(); ArrayList<String> newList = new ArrayList<String>(); newList.add(alias); pathToAliases.put(newPath, newList); work.setPathToAliases(pathToAliases); PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); work.addPathToPartitionInfo(newPath, pDesc); return newPath; }
Path taskTmpDirPath = new Path(taskTmpDir); MapWork mWork = plan.getMapWork(); if (!mWork.getPathToAliases().containsKey(taskTmpDirPath)) { taskTmpDir = taskTmpDir.intern(); StringInternUtils.internUriStringsInPath(taskTmpDirPath);
TableDesc tt_desc = tt_descLst.get(pos); MapWork mWork = plan.getMapWork(); if (mWork.getPathToAliases().get(taskTmpDir) == null) { taskTmpDir = taskTmpDir.intern(); Path taskTmpDirPath = StringInternUtils.internUriStringsInPath(new Path(taskTmpDir));
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, MapWork work, Path hiveScratchDir) throws Exception { String strPath = path.toString(); // The input file does not exist, replace it by a empty file PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); if (partDesc.getTableDesc().isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return path; } Properties props = SerDeUtils.createOverlayedProperties( partDesc.getTableDesc().getProperties(), partDesc.getProperties()); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc); boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, oneRow); if (LOG.isInfoEnabled()) { LOG.info("Changed input file " + strPath + " to empty file " + newPath + " (" + oneRow + ")"); } // update the work work.addPathToAlias(newPath, work.getPathToAliases().get(path)); work.removePathToAlias(path); work.removePathToPartitionInfo(path); work.addPathToPartitionInfo(newPath, partDesc); return newPath; }