@Test public void test() throws IOException { List<Text> data = new ArrayList<Text>(); for (int i = 0; i < 1001; i++) { data.add(new Text(String.valueOf(i))); } for (Text t : data) { reduceDriver.addInput(t, new ArrayList<LongWritable>()); } reduceDriver.getConfiguration().set(BatchConstants.REGION_NUMBER, "2"); List<Pair<Text, LongWritable>> result = reduceDriver.run(); assertEquals(2, result.size()); for (Pair<Text, LongWritable> p : result) { System.out.println(p.getFirst()); } }
/** * Identical to addInput() but returns self for fluent programming style * * @param input * @return this */ public ReduceDriver<K1, V1, K2, V2> withInput(final KeyValueReuseList<K1, V1> input) { addInput(input); return this; }
/** * Adds input to send to the Reducer * * @param inputs * list of (K*, V*) pairs */ public void addAllElements(final List<KeyValueReuseList<K1, V1>> inputs) { // This method is called addAllElements to avoid erasure conflict with addAll method from ReduceDriverBase. for (KeyValueReuseList<K1, V1> input : inputs) { addInput(input); } }
/** * Handle inputKey and inputValues and inputs for backwards compatibility. */ @Override protected void preRunChecks(Object reducer) { if (inputKey != null && !getInputValues().isEmpty()) { clearInput(); addInput(new ReduceFeeder<K1, V1>(getConfiguration()).updateInput(inputKey, getInputValues())); } if (inputs != null && !inputs.isEmpty()){ groupedInputs.clear(); groupedInputs = new ReduceFeeder<K1, V1>(getConfiguration()).updateAll(inputs); } if (groupedInputs == null || groupedInputs.isEmpty()) { throw new IllegalStateException("No input was provided"); } if (reducer == null) { throw new IllegalStateException("No Reducer class was provided"); } if (driverReused()) { throw new IllegalStateException("Driver reuse not allowed"); } else { setUsedOnceStatus(); } }
@Test public void testHypercubeReducer() throws IOException { ReduceDriver<IntWritable, CentroidWritable, IntWritable, CentroidWritable> reduceDriver = ReduceDriver.newReduceDriver(new StreamingKMeansReducer()); Configuration configuration = reduceDriver.getConfiguration(); configure(configuration); System.out.printf("%s reducer test\n", configuration.get(StreamingKMeansDriver.SEARCHER_CLASS_OPTION)); StreamingKMeans clusterer = new StreamingKMeans(StreamingKMeansUtilsMR .searcherFromConfiguration(configuration), (1 << NUM_DIMENSIONS) * (int)Math.log(NUM_DATA_POINTS), DISTANCE_CUTOFF); long start = System.currentTimeMillis(); clusterer.cluster(syntheticData.getFirst()); long end = System.currentTimeMillis(); System.out.printf("%f [s]\n", (end - start) / 1000.0); List<CentroidWritable> reducerInputs = Lists.newArrayList(); int postMapperTotalWeight = 0; for (Centroid intermediateCentroid : clusterer) { reducerInputs.add(new CentroidWritable(intermediateCentroid)); postMapperTotalWeight += intermediateCentroid.getWeight(); } reduceDriver.addInput(new IntWritable(0), reducerInputs); List<org.apache.hadoop.mrunit.types.Pair<IntWritable, CentroidWritable>> results = reduceDriver.run(); testReducerResults(postMapperTotalWeight, results); }