org.apache.hadoop.hive.ql.io.orc.OrcFlowFileWriter$TreeWriter java code examples

private long estimateStripeSize() {
  long result = 0;
  for (BufferedStream stream : streams.values()) {
    result += stream.getBufferSize();
  }
  result += treeWriter.estimateMemory();
  return result;
}

      removeIsPresentPositions();
writeStripeStatistics(stripeStatsBuilder, this);
stripeStatsBuilders.add(stripeStatsBuilder);
builder.addColumns(getEncoding());
builder.setWriterTimezone(TimeZone.getDefault().getID());
if (rowIndexStream != null) {

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    Text val = getTextValue(obj);
    if (useDictionaryEncoding || !strideDictionaryCheck) {
      rows.add(dictionary.add(val));
    } else {
      // write data and length
      directStreamOutput.write(val.getBytes(), 0, val.getLength());
      directLengthOutput.write(val.getLength());
    }
    indexStatistics.updateString(val);
    if (createBloomFilter) {
      bloomFilter.addBytes(val.getBytes(), val.getLength());
    }
  }
}

      removeIsPresentPositions();
writeStripeStatistics(stripeStatsBuilder, this);
stripeStatsBuilders.add(stripeStatsBuilder);
builder.addColumns(getEncoding());
builder.setWriterTimezone(TimeZone.getDefault().getID());
if (rowIndexStream != null) {

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    long val;
    if (intInspector != null) {
      val = intInspector.get(obj);
    } else if (longInspector != null) {
      val = longInspector.get(obj);
    } else {
      val = shortInspector.get(obj);
    }
    indexStatistics.updateInteger(val);
    if (createBloomFilter) {
      // integers are converted to longs in column statistics and during SARG evaluation
      bloomFilter.addLong(val);
    }
    writer.write(val);
  }
}

public void addRowBatch(VectorizedRowBatch batch) throws IOException {
  if (buildIndex) {
    // Batch the writes up to the rowIndexStride so that we can get the
    // right size indexes.
    int posn = 0;
    while (posn < batch.size) {
      int chunkSize = Math.min(batch.size - posn,
          rowIndexStride - rowsInIndex);
      treeWriter.writeRootBatch(batch, posn, chunkSize);
      posn += chunkSize;
      rowsInIndex += chunkSize;
      rowsInStripe += chunkSize;
      if (rowsInIndex >= rowIndexStride) {
        createRowIndexEntry();
      }
    }
  } else {
    rowsInStripe += batch.size;
    treeWriter.writeRootBatch(batch, 0, batch.size);
  }
  memoryManager.addedRow();
}

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    MapObjectInspector insp = (MapObjectInspector) inspector;
    // this sucks, but it will have to do until we can get a better
    // accessor in the MapObjectInspector.
    Map<?, ?> valueMap = insp.getMap(obj);
    lengths.write(valueMap.size());
    if (createBloomFilter) {
      bloomFilter.addLong(valueMap.size());
    }
    for (Map.Entry<?, ?> entry : valueMap.entrySet()) {
      childrenWriters[0].write(entry.getKey());
      childrenWriters[1].write(entry.getValue());
    }
  }
}

/**
 * Create a row index entry with the previous location and the current
 * index statistics. Also merges the index statistics into the file
 * statistics before they are cleared. Finally, it records the start of the
 * next index and ensures all of the children columns also create an entry.
 *
 * @throws IOException if an error occurs during create
 */
void createRowIndexEntry() throws IOException {
  stripeColStatistics.merge(indexStatistics);
  rowIndexEntry.setStatistics(indexStatistics.serialize());
  indexStatistics.reset();
  rowIndex.addEntry(rowIndexEntry);
  rowIndexEntry.clear();
  addBloomFilterEntry();
  recordPosition(rowIndexPosition);
  for (TreeWriter child : childrenWriters) {
    child.createRowIndexEntry();
  }
}

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    Text val = getTextValue(obj);
    if (useDictionaryEncoding || !strideDictionaryCheck) {
      rows.add(dictionary.add(val));
    } else {
      // write data and length
      directStreamOutput.write(val.getBytes(), 0, val.getLength());
      directLengthOutput.write(val.getLength());
    }
    indexStatistics.updateString(val);
    if (createBloomFilter) {
      bloomFilter.addBytes(val.getBytes(), val.getLength());
    }
  }
}

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    HiveDecimal decimal = ((HiveDecimalObjectInspector) inspector).getPrimitiveJavaObject(obj);
    if (decimal == null) {
      return;
    }
    SerializationUtils.writeBigInteger(valueStream,
        decimal.unscaledValue());
    scaleStream.write(decimal.scale());
    indexStatistics.updateDecimal(decimal);
    if (createBloomFilter) {
      bloomFilter.addString(decimal.toString());
    }
  }
}

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    ListObjectInspector insp = (ListObjectInspector) inspector;
    int len = insp.getListLength(obj);
    lengths.write(len);
    if (createBloomFilter) {
      bloomFilter.addLong(len);
    }
    for (int i = 0; i < len; ++i) {
      childrenWriters[0].write(insp.getListElement(obj, i));
    }
  }
}

@Override
public void addRow(Object row) throws IOException {
  synchronized (this) {
    treeWriter.write(row);
    rowsInStripe += 1;
    if (buildIndex) {
      rowsInIndex += 1;
      if (rowsInIndex >= rowIndexStride) {
        createRowIndexEntry();
      }
    }
  }
  memoryManager.addedRow();
}

/**
 * Handle the top level object write.
 *
 * This default method is used for all types except structs, which are the
 * typical case. VectorizedRowBatch assumes the top level object is a
 * struct, so we use the first column for all other types.
 * @param batch the batch to write from
 * @param offset the row to start on
 * @param length the number of rows to write
 * @throws IOException if an error occurs during write
 */
void writeRootBatch(VectorizedRowBatch batch, int offset,
          int length) throws IOException {
  writeBatch(batch.cols[0], offset, length);
}

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    Timestamp val =
        ((TimestampObjectInspector) inspector).getPrimitiveJavaObject(obj);
    indexStatistics.updateTimestamp(val);
    seconds.write((val.getTime() / MILLIS_PER_SECOND) - base_timestamp);
    nanos.write(formatNanos(val.getNanos()));
    if (createBloomFilter) {
      bloomFilter.addLong(val.getTime());
    }
  }
}

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    BytesWritable val =
        ((BinaryObjectInspector) inspector).getPrimitiveWritableObject(obj);
    stream.write(val.getBytes(), 0, val.getLength());
    length.write(val.getLength());
    indexStatistics.updateBinary(val);
    if (createBloomFilter) {
      bloomFilter.addBytes(val.getBytes(), val.getLength());
    }
  }
}

private void createRowIndexEntry() throws IOException {
  treeWriter.createRowIndexEntry();
  rowsInIndex = 0;
}

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    UnionObjectInspector insp = (UnionObjectInspector) inspector;
    byte tag = insp.getTag(obj);
    tags.write(tag);
    if (createBloomFilter) {
      bloomFilter.addLong(tag);
    }
    childrenWriters[tag].write(insp.getField(obj));
  }
}

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    float val = ((FloatObjectInspector) inspector).get(obj);
    indexStatistics.updateDouble(val);
    if (createBloomFilter) {
      // floats are converted to doubles in column statistics and during SARG evaluation
      bloomFilter.addDouble(val);
    }
    utils.writeFloat(stream, val);
  }
}

@Override
void write(Object obj) throws IOException {
  super.write(obj);
  if (obj != null) {
    // Using the Writable here as it's used directly for writing as well as for stats.
    DateWritable val = ((DateObjectInspector) inspector).getPrimitiveWritableObject(obj);
    indexStatistics.updateDate(val);
    writer.write(val.getDays());
    if (createBloomFilter) {
      bloomFilter.addLong(val.getDays());
    }
  }
}

  @Override
  void recordPosition(PositionRecorder recorder) throws IOException {
    super.recordPosition(recorder);
    stream.getPosition(recorder);
    length.getPosition(recorder);
  }
}

Javadoc

The parent class of all of the writers for each column. Each column is written by an instance of this class. The compound types (struct, list, map, and union) have children tree writers that write the children types.

Most used methods

addBloomFilterEntry
createRowIndexEntry
Create a row index entry with the previous location and the current index statistics. Also merges th
estimateMemory
Estimate how much memory the writer is consuming excluding the streams.
getChildrenWriters
getEncoding
Get the encoding for this column.
recordPosition
Record the current position in each of this column's streams.
removeIsPresentPositions
write
Add a new value to the column.
writeBatch
Write the values from the given vector from offset for length elements.
writeRootBatch
Handle the top level object write. This default method is used for all types except structs, which a
writeStripe
Write the stripe out to the file.
writeStripeStatistics

Popular in Java

Reading from database using SQL prepared statement
findViewById (Activity)
scheduleAtFixedRate (Timer)
setContentView (Activity)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
IsNull (org.hamcrest.core)
Is the value null?
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Top Vim plugins

How to useOrcFlowFileWriter$TreeWriter in org.apache.hadoop.hive.ql.io.orc

Best Java code snippets using org.apache.hadoop.hive.ql.io.orc.OrcFlowFileWriter$TreeWriter (Showing top 20 results out of 315)

How to use
OrcFlowFileWriter$TreeWriter
in
org.apache.hadoop.hive.ql.io.orc