private long estimateStripeSize() { long result = 0; for (BufferedStream stream : streams.values()) { result += stream.getBufferSize(); } result += treeWriter.estimateMemory(); return result; }
removeIsPresentPositions(); writeStripeStatistics(stripeStatsBuilder, this); stripeStatsBuilders.add(stripeStatsBuilder); builder.addColumns(getEncoding()); builder.setWriterTimezone(TimeZone.getDefault().getID()); if (rowIndexStream != null) {
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { Text val = getTextValue(obj); if (useDictionaryEncoding || !strideDictionaryCheck) { rows.add(dictionary.add(val)); } else { // write data and length directStreamOutput.write(val.getBytes(), 0, val.getLength()); directLengthOutput.write(val.getLength()); } indexStatistics.updateString(val); if (createBloomFilter) { bloomFilter.addBytes(val.getBytes(), val.getLength()); } } }
removeIsPresentPositions(); writeStripeStatistics(stripeStatsBuilder, this); stripeStatsBuilders.add(stripeStatsBuilder); builder.addColumns(getEncoding()); builder.setWriterTimezone(TimeZone.getDefault().getID()); if (rowIndexStream != null) {
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { long val; if (intInspector != null) { val = intInspector.get(obj); } else if (longInspector != null) { val = longInspector.get(obj); } else { val = shortInspector.get(obj); } indexStatistics.updateInteger(val); if (createBloomFilter) { // integers are converted to longs in column statistics and during SARG evaluation bloomFilter.addLong(val); } writer.write(val); } }
public void addRowBatch(VectorizedRowBatch batch) throws IOException { if (buildIndex) { // Batch the writes up to the rowIndexStride so that we can get the // right size indexes. int posn = 0; while (posn < batch.size) { int chunkSize = Math.min(batch.size - posn, rowIndexStride - rowsInIndex); treeWriter.writeRootBatch(batch, posn, chunkSize); posn += chunkSize; rowsInIndex += chunkSize; rowsInStripe += chunkSize; if (rowsInIndex >= rowIndexStride) { createRowIndexEntry(); } } } else { rowsInStripe += batch.size; treeWriter.writeRootBatch(batch, 0, batch.size); } memoryManager.addedRow(); }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { MapObjectInspector insp = (MapObjectInspector) inspector; // this sucks, but it will have to do until we can get a better // accessor in the MapObjectInspector. Map<?, ?> valueMap = insp.getMap(obj); lengths.write(valueMap.size()); if (createBloomFilter) { bloomFilter.addLong(valueMap.size()); } for (Map.Entry<?, ?> entry : valueMap.entrySet()) { childrenWriters[0].write(entry.getKey()); childrenWriters[1].write(entry.getValue()); } } }
/** * Create a row index entry with the previous location and the current * index statistics. Also merges the index statistics into the file * statistics before they are cleared. Finally, it records the start of the * next index and ensures all of the children columns also create an entry. * * @throws IOException if an error occurs during create */ void createRowIndexEntry() throws IOException { stripeColStatistics.merge(indexStatistics); rowIndexEntry.setStatistics(indexStatistics.serialize()); indexStatistics.reset(); rowIndex.addEntry(rowIndexEntry); rowIndexEntry.clear(); addBloomFilterEntry(); recordPosition(rowIndexPosition); for (TreeWriter child : childrenWriters) { child.createRowIndexEntry(); } }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { Text val = getTextValue(obj); if (useDictionaryEncoding || !strideDictionaryCheck) { rows.add(dictionary.add(val)); } else { // write data and length directStreamOutput.write(val.getBytes(), 0, val.getLength()); directLengthOutput.write(val.getLength()); } indexStatistics.updateString(val); if (createBloomFilter) { bloomFilter.addBytes(val.getBytes(), val.getLength()); } } }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { HiveDecimal decimal = ((HiveDecimalObjectInspector) inspector).getPrimitiveJavaObject(obj); if (decimal == null) { return; } SerializationUtils.writeBigInteger(valueStream, decimal.unscaledValue()); scaleStream.write(decimal.scale()); indexStatistics.updateDecimal(decimal); if (createBloomFilter) { bloomFilter.addString(decimal.toString()); } } }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { ListObjectInspector insp = (ListObjectInspector) inspector; int len = insp.getListLength(obj); lengths.write(len); if (createBloomFilter) { bloomFilter.addLong(len); } for (int i = 0; i < len; ++i) { childrenWriters[0].write(insp.getListElement(obj, i)); } } }
@Override public void addRow(Object row) throws IOException { synchronized (this) { treeWriter.write(row); rowsInStripe += 1; if (buildIndex) { rowsInIndex += 1; if (rowsInIndex >= rowIndexStride) { createRowIndexEntry(); } } } memoryManager.addedRow(); }
/** * Handle the top level object write. * * This default method is used for all types except structs, which are the * typical case. VectorizedRowBatch assumes the top level object is a * struct, so we use the first column for all other types. * @param batch the batch to write from * @param offset the row to start on * @param length the number of rows to write * @throws IOException if an error occurs during write */ void writeRootBatch(VectorizedRowBatch batch, int offset, int length) throws IOException { writeBatch(batch.cols[0], offset, length); }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { Timestamp val = ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj); indexStatistics.updateTimestamp(val); seconds.write((val.getTime() / MILLIS_PER_SECOND) - base_timestamp); nanos.write(formatNanos(val.getNanos())); if (createBloomFilter) { bloomFilter.addLong(val.getTime()); } } }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { BytesWritable val = ((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj); stream.write(val.getBytes(), 0, val.getLength()); length.write(val.getLength()); indexStatistics.updateBinary(val); if (createBloomFilter) { bloomFilter.addBytes(val.getBytes(), val.getLength()); } } }
private void createRowIndexEntry() throws IOException { treeWriter.createRowIndexEntry(); rowsInIndex = 0; }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { UnionObjectInspector insp = (UnionObjectInspector) inspector; byte tag = insp.getTag(obj); tags.write(tag); if (createBloomFilter) { bloomFilter.addLong(tag); } childrenWriters[tag].write(insp.getField(obj)); } }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { float val = ((FloatObjectInspector) inspector).get(obj); indexStatistics.updateDouble(val); if (createBloomFilter) { // floats are converted to doubles in column statistics and during SARG evaluation bloomFilter.addDouble(val); } utils.writeFloat(stream, val); } }
@Override void write(Object obj) throws IOException { super.write(obj); if (obj != null) { // Using the Writable here as it's used directly for writing as well as for stats. DateWritable val = ((DateObjectInspector) inspector).getPrimitiveWritableObject(obj); indexStatistics.updateDate(val); writer.write(val.getDays()); if (createBloomFilter) { bloomFilter.addLong(val.getDays()); } } }
@Override void recordPosition(PositionRecorder recorder) throws IOException { super.recordPosition(recorder); stream.getPosition(recorder); length.getPosition(recorder); } }