org.apache.hadoop.hive.ql.plan.MapWork.setPathToPartitionInfo java code examples

target.setPathToPartitionInfo(targetPathToPartitionInfo);

/**
 * create a new plan and return. The pan won't contain the name to split
 * sample information in parse context.
 *
 * @return the new plan
 */
public static MapredWork getMapRedWorkFromConf(HiveConf conf) {
 MapredWork mrWork = new MapredWork();
 MapWork work = mrWork.getMapWork();
 boolean mapperCannotSpanPartns =
   conf.getBoolVar(
     HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
 work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
 work.setPathToAliases(new LinkedHashMap<Path, ArrayList<String>>());
 work.setPathToPartitionInfo(new LinkedHashMap<Path, PartitionDesc>());
 work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>());
 return mrWork;
}

target.setPathToPartitionInfo(targetPathToPartitionInfo);

 @Override
 public MapWork read(Kryo kryo, Input input, Class<MapWork> type) {
  MapWork mapWork = super.read(kryo, input, type);
  // The set methods in MapWork intern the any duplicate strings which is why we call them
  // during de-serialization
  mapWork.setPathToPartitionInfo(mapWork.getPathToPartitionInfo());
  mapWork.setPathToAliases(mapWork.getPathToAliases());
  return mapWork;
 }
}

/**
 * create a new plan and return. The pan won't contain the name to split
 * sample information in parse context.
 *
 * @return the new plan
 */
public static MapredWork getMapRedWorkFromConf(HiveConf conf) {
 MapredWork mrWork = new MapredWork();
 MapWork work = mrWork.getMapWork();
 boolean mapperCannotSpanPartns =
   conf.getBoolVar(
     HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
 work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
 work.setPathToAliases(new LinkedHashMap<Path, ArrayList<String>>());
 work.setPathToPartitionInfo(new LinkedHashMap<Path, PartitionDesc>());
 work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>());
 return mrWork;
}

@Override
protected void setUp() throws IOException {
 conf = new Configuration();
 job = new JobConf(conf);
 TableDesc tblDesc = Utilities.defaultTd;
 PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
 LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
 pt.put(new Path("/tmp/testfolder"), partDesc);
 MapredWork mrwork = new MapredWork();
 mrwork.getMapWork().setPathToPartitionInfo(pt);
 Utilities.setMapRedWork(job, mrwork,new Path("/tmp/" + System.getProperty("user.name"), "hive"));
 fileSystem = FileSystem.getLocal(conf);
 testDir = new Path(System.getProperty("test.tmp.dir", System.getProperty(
   "user.dir", new File(".").getAbsolutePath()))
   + "/TestSymlinkTextInputFormat");
 reporter = Reporter.NULL;
 fileSystem.delete(testDir, true);
 dataDir1 = new Path(testDir, "datadir1");
 dataDir2 = new Path(testDir, "datadir2");
 symlinkDir = new Path(testDir, "symlinkdir");
}

public void testAvoidSplitCombination() throws Exception {
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf);
  TableDesc tblDesc = Utilities.defaultTd;
  tblDesc.setInputFileFormatClass(TestSkipCombineInputFormat.class);
  PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
  LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
  pt.put(new Path("/tmp/testfolder1"), partDesc);
  pt.put(new Path("/tmp/testfolder2"), partDesc);
  MapredWork mrwork = new MapredWork();
  mrwork.getMapWork().setPathToPartitionInfo(pt);
  Path mapWorkPath = new Path("/tmp/" + System.getProperty("user.name"), "hive");
  Utilities.setMapRedWork(conf, mrwork,
    mapWorkPath);
  try {
    Path[] paths = new Path[2];
    paths[0] = new Path("/tmp/testfolder1");
    paths[1] = new Path("/tmp/testfolder2");
    CombineHiveInputFormat combineInputFormat =
      ReflectionUtils.newInstance(CombineHiveInputFormat.class, conf);
    combineInputFormat.pathToPartitionInfo =
      Utilities.getMapWork(conf).getPathToPartitionInfo();
    Set results = combineInputFormat.getNonCombinablePathIndices(job, paths, 2);
    assertEquals("Should have both path indices in the results set", 2, results.size());
  } finally {
    // Cleanup the mapwork path
    FileSystem.get(conf).delete(mapWorkPath, true);
  }
}

mapWork1.setAliasToWork(new LinkedHashMap<>(
    ImmutableMap.of(alias1Name, (Operator<?>) mock(Operator.class))));
mapWork1.setPathToPartitionInfo(new LinkedHashMap<>(
    ImmutableMap.of(nonExistentPath1, mockPartitionDesc)));
mapWork2.setAliasToWork(new LinkedHashMap<>(
    ImmutableMap.of(alias2Name, (Operator<?>) mock(Operator.class))));
mapWork2.setPathToPartitionInfo(new LinkedHashMap<>(
    ImmutableMap.of(nonExistentPath2, mockPartitionDesc)));

mapWork.setPathToPartitionInfo(pathToPartitionInfo);

private void init() throws IOException {
 conf = new JobConf();
 resetIOContext();
 rcfReader = mock(RCFileRecordReader.class);
 when(rcfReader.next((LongWritable)anyObject(),
           (BytesRefArrayWritable )anyObject())).thenReturn(true);
 // Since the start is 0, and the length is 100, the first call to sync should be with the value
 // 50 so return that for getPos()
 when(rcfReader.getPos()).thenReturn(50L);
 conf.setBoolean("hive.input.format.sorted", true);
 TableDesc tblDesc = Utilities.defaultTd;
 PartitionDesc partDesc = new PartitionDesc(tblDesc, null);
 LinkedHashMap<Path, PartitionDesc> pt = new LinkedHashMap<>();
 pt.put(new Path("/tmp/testfolder"), partDesc);
 MapredWork mrwork = new MapredWork();
 mrwork.getMapWork().setPathToPartitionInfo(pt);
 Utilities.setMapRedWork(conf, mrwork,new Path("/tmp/" + System.getProperty("user.name"), "hive"));
 hiveSplit = new TestHiveInputSplit();
 hbsReader = new TestHiveRecordReader(rcfReader, conf);
 hbsReader.initIOContext(hiveSplit, conf, Class.class, rcfReader);
}

mapWork.setPathToPartitionInfo(partMap);

mrwork.getMapWork().setPathToPartitionInfo(pt);
mrwork.getMapWork().setAliasToWork(ao);

mrwork.getMapWork().setPathToPartitionInfo(pathToPartitionInfo);
mrwork.getMapWork().setAliasToWork(aliasToWork);

@SuppressWarnings("rawtypes")
private static Path createDummyFileForEmptyTable(JobConf job, MapWork work,
  Path hiveScratchDir, String alias, int sequenceNumber)
    throws Exception {
 TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc();
 if (tableDesc.isNonNative()) {
  // if this isn't a hive table we can't create an empty file for it.
  return null;
 }
 Properties props = tableDesc.getProperties();
 HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc);
 Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job,
   sequenceNumber, props, false);
 if (LOG.isInfoEnabled()) {
  LOG.info("Changed input file for alias " + alias + " to " + newPath);
 }
 // update the work
 LinkedHashMap<String, ArrayList<String>> pathToAliases = work.getPathToAliases();
 ArrayList<String> newList = new ArrayList<String>();
 newList.add(alias);
 pathToAliases.put(newPath.toUri().toString(), newList);
 work.setPathToAliases(pathToAliases);
 LinkedHashMap<String, PartitionDesc> pathToPartitionInfo = work.getPathToPartitionInfo();
 PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone();
 pathToPartitionInfo.put(newPath.toUri().toString(), pDesc);
 work.setPathToPartitionInfo(pathToPartitionInfo);
 return newPath;
}

/**
 * create a new plan and return. The pan won't contain the name to split
 * sample information in parse context.
 *
 * @return the new plan
 */
public static MapredWork getMapRedWorkFromConf(HiveConf conf) {
 MapredWork mrWork = new MapredWork();
 MapWork work = mrWork.getMapWork();
 boolean mapperCannotSpanPartns =
   conf.getBoolVar(
     HiveConf.ConfVars.HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS);
 work.setMapperCannotSpanPartns(mapperCannotSpanPartns);
 work.setPathToAliases(new LinkedHashMap<String, ArrayList<String>>());
 work.setPathToPartitionInfo(new LinkedHashMap<String, PartitionDesc>());
 work.setAliasToWork(new LinkedHashMap<String, Operator<? extends OperatorDesc>>());
 work.setHadoopSupportsSplittable(
   conf.getBoolVar(HiveConf.ConfVars.HIVE_COMBINE_INPUT_FORMAT_SUPPORTS_SPLITTABLE));
 return mrWork;
}

pathToPartitionInfo.put(strNewPath, pathToPartitionInfo.get(strPath));
pathToPartitionInfo.remove(strPath);
work.setPathToPartitionInfo(pathToPartitionInfo);

How to use setPathToPartitionInfomethodin org.apache.hadoop.hive.ql.plan.MapWork

Best Java code snippets using org.apache.hadoop.hive.ql.plan.MapWork.setPathToPartitionInfo (Showing top 16 results out of 315)

How to use
setPathToPartitionInfo
method
in
org.apache.hadoop.hive.ql.plan.MapWork