target.setPathToPartitionInfo(targetPathToPartitionInfo);
/** * create a new plan and return. The pan won't contain the name to split * sample information in parse context. * * @return the new plan */ public static MapredWork getMapRedWorkFromConf(HiveConf conf) { MapredWork mrWork = new MapredWork(); MapWork work = mrWork.getMapWork(); boolean mapperCannotSpanPartns = conf.getBoolVar( HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); work.setMapperCannotSpanPartns(mapperCannotSpanPartns); work.setPathToAliases(new LinkedHashMap<Path, ArrayList<String>>()); work.setPathToPartitionInfo(new LinkedHashMap<Path, PartitionDesc>()); work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>()); return mrWork; }
target.setPathToPartitionInfo(targetPathToPartitionInfo);
@Override public MapWork read(Kryo kryo, Input input, Class<MapWork> type) { MapWork mapWork = super.read(kryo, input, type); // The set methods in MapWork intern the any duplicate strings which is why we call them // during de-serialization mapWork.setPathToPartitionInfo(mapWork.getPathToPartitionInfo()); mapWork.setPathToAliases(mapWork.getPathToAliases()); return mapWork; } }
/** * create a new plan and return. The pan won't contain the name to split * sample information in parse context. * * @return the new plan */ public static MapredWork getMapRedWorkFromConf(HiveConf conf) { MapredWork mrWork = new MapredWork(); MapWork work = mrWork.getMapWork(); boolean mapperCannotSpanPartns = conf.getBoolVar( HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); work.setMapperCannotSpanPartns(mapperCannotSpanPartns); work.setPathToAliases(new LinkedHashMap<Path, ArrayList<String>>()); work.setPathToPartitionInfo(new LinkedHashMap<Path, PartitionDesc>()); work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>()); return mrWork; }
@Override protected void setUp() throws IOException { conf = new Configuration(); job = new JobConf(conf); TableDesc tblDesc = Utilities.defaultTd; PartitionDesc partDesc = new PartitionDesc(tblDesc, null); LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>(); pt.put(new Path("/tmp/testfolder"), partDesc); MapredWork mrwork = new MapredWork(); mrwork.getMapWork().setPathToPartitionInfo(pt); Utilities.setMapRedWork(job, mrwork,new Path("/tmp/" + System.getProperty("user.name"), "hive")); fileSystem = FileSystem.getLocal(conf); testDir = new Path(System.getProperty("test.tmp.dir", System.getProperty( "user.dir", new File(".").getAbsolutePath())) + "/TestSymlinkTextInputFormat"); reporter = Reporter.NULL; fileSystem.delete(testDir, true); dataDir1 = new Path(testDir, "datadir1"); dataDir2 = new Path(testDir, "datadir2"); symlinkDir = new Path(testDir, "symlinkdir"); }
public void testAvoidSplitCombination() throws Exception { Configuration conf = new Configuration(); JobConf job = new JobConf(conf); TableDesc tblDesc = Utilities.defaultTd; tblDesc.setInputFileFormatClass(TestSkipCombineInputFormat.class); PartitionDesc partDesc = new PartitionDesc(tblDesc, null); LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>(); pt.put(new Path("/tmp/testfolder1"), partDesc); pt.put(new Path("/tmp/testfolder2"), partDesc); MapredWork mrwork = new MapredWork(); mrwork.getMapWork().setPathToPartitionInfo(pt); Path mapWorkPath = new Path("/tmp/" + System.getProperty("user.name"), "hive"); Utilities.setMapRedWork(conf, mrwork, mapWorkPath); try { Path[] paths = new Path[2]; paths[0] = new Path("/tmp/testfolder1"); paths[1] = new Path("/tmp/testfolder2"); CombineHiveInputFormat combineInputFormat = ReflectionUtils.newInstance(CombineHiveInputFormat.class, conf); combineInputFormat.pathToPartitionInfo = Utilities.getMapWork(conf).getPathToPartitionInfo(); Set results = combineInputFormat.getNonCombinablePathIndices(job, paths, 2); assertEquals("Should have both path indices in the results set", 2, results.size()); } finally { // Cleanup the mapwork path FileSystem.get(conf).delete(mapWorkPath, true); } }
mapWork1.setAliasToWork(new LinkedHashMap<>( ImmutableMap.of(alias1Name, (Operator<?>) mock(Operator.class)))); mapWork1.setPathToPartitionInfo(new LinkedHashMap<>( ImmutableMap.of(nonExistentPath1, mockPartitionDesc))); mapWork2.setAliasToWork(new LinkedHashMap<>( ImmutableMap.of(alias2Name, (Operator<?>) mock(Operator.class)))); mapWork2.setPathToPartitionInfo(new LinkedHashMap<>( ImmutableMap.of(nonExistentPath2, mockPartitionDesc)));
mapWork.setPathToPartitionInfo(pathToPartitionInfo);
private void init() throws IOException { conf = new JobConf(); resetIOContext(); rcfReader = mock(RCFileRecordReader.class); when(rcfReader.next((LongWritable)anyObject(), (BytesRefArrayWritable )anyObject())).thenReturn(true); // Since the start is 0, and the length is 100, the first call to sync should be with the value // 50 so return that for getPos() when(rcfReader.getPos()).thenReturn(50L); conf.setBoolean("hive.input.format.sorted", true); TableDesc tblDesc = Utilities.defaultTd; PartitionDesc partDesc = new PartitionDesc(tblDesc, null); LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>(); pt.put(new Path("/tmp/testfolder"), partDesc); MapredWork mrwork = new MapredWork(); mrwork.getMapWork().setPathToPartitionInfo(pt); Utilities.setMapRedWork(conf, mrwork,new Path("/tmp/" + System.getProperty("user.name"), "hive")); hiveSplit = new TestHiveInputSplit(); hbsReader = new TestHiveRecordReader(rcfReader, conf); hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader); }
mapWork.setPathToPartitionInfo(partMap);
mrwork.getMapWork().setPathToPartitionInfo(pt); mrwork.getMapWork().setAliasToWork(ao);
mrwork.getMapWork().setPathToPartitionInfo(pathToPartitionInfo); mrwork.getMapWork().setAliasToWork(aliasToWork);
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, Path hiveScratchDir, String alias, int sequenceNumber) throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); if (tableDesc.isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return null; } Properties props = tableDesc.getProperties(); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, sequenceNumber, props, false); if (LOG.isInfoEnabled()) { LOG.info("Changed input file for alias " + alias + " to " + newPath); } // update the work LinkedHashMap<String, ArrayList<String>> pathToAliases = work.getPathToAliases(); ArrayList<String> newList = new ArrayList<String>(); newList.add(alias); pathToAliases.put(newPath.toUri().toString(), newList); work.setPathToAliases(pathToAliases); LinkedHashMap<String, PartitionDesc> pathToPartitionInfo = work.getPathToPartitionInfo(); PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); pathToPartitionInfo.put(newPath.toUri().toString(), pDesc); work.setPathToPartitionInfo(pathToPartitionInfo); return newPath; }
/** * create a new plan and return. The pan won't contain the name to split * sample information in parse context. * * @return the new plan */ public static MapredWork getMapRedWorkFromConf(HiveConf conf) { MapredWork mrWork = new MapredWork(); MapWork work = mrWork.getMapWork(); boolean mapperCannotSpanPartns = conf.getBoolVar( HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS); work.setMapperCannotSpanPartns(mapperCannotSpanPartns); work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>()); work.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>()); work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>()); work.setHadoopSupportsSplittable( conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE)); return mrWork; }
pathToPartitionInfo.put(strNewPath, pathToPartitionInfo.get(strPath)); pathToPartitionInfo.remove(strPath); work.setPathToPartitionInfo(pathToPartitionInfo);