protected AbstractGroupCollectionAggregationState(PageBuilder pageBuilder) { this.headBlockIndex = new ShortBigArray(NULL); this.headPosition = new IntBigArray(NULL); this.nextBlockIndex = new ShortBigArray(NULL); this.nextPosition = new IntBigArray(NULL); this.tailBlockIndex = new ShortBigArray(NULL); this.tailPosition = new IntBigArray(NULL); this.currentPageBuilder = pageBuilder; this.values = new ArrayList<>(); this.sumPositions = new LongArrayList(); this.groupEntryCount = new IntBigArray(); values.add(currentPageBuilder); sumPositions.add(0L); valueBlocksRetainedSizeInBytes = 0; totalPositions = 0; capacity = 1024; nextBlockIndex.ensureCapacity(capacity); nextPosition.ensureCapacity(capacity); groupEntryCount.ensureCapacity(capacity); }
@Override public int nextInt() { return groupIds.get(position++); } };
private void rehash(int size) { int newHashSize = arraySize(size + 1, FILL_RATIO); hashMask = newHashSize - 1; maxFill = calculateMaxFill(newHashSize); blockPositionByHash.ensureCapacity(newHashSize); blockPositionByHash.fill(EMPTY_SLOT); // the first element of elementBlock is always null for (int blockPosition = 1; blockPosition < elementBlock.getPositionCount(); blockPosition++) { blockPositionByHash.set(getHashPositionOfElement(elementBlock, blockPosition), blockPosition); } }
private void rehash() { long newCapacityLong = hashCapacity * 2L; if (newCapacityLong > Integer.MAX_VALUE) { throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries"); } int newCapacity = (int) newCapacityLong; int newMask = newCapacity - 1; IntBigArray newHashPositions = new IntBigArray(-1); newHashPositions.ensureCapacity(newCapacity); for (int i = 0; i < values.getPositionCount(); i++) { // find an empty slot for the address int hashPosition = getBucketId(TypeUtils.hashPosition(type, values, i), newMask); while (newHashPositions.get(hashPosition) != -1) { hashPosition = (hashPosition + 1) & newMask; } // record the mapping newHashPositions.set(hashPosition, i); } hashCapacity = newCapacity; mask = newMask; maxFill = calculateMaxFill(newCapacity); hashPositions = newHashPositions; this.counts.ensureCapacity(maxFill); }
capacity *= 1.5; nextBlockIndex.ensureCapacity(capacity); nextPosition.ensureCapacity(capacity); headPosition.set(currentGroupId, insertedPosition); long absoluteTailAddress = toAbsolutePosition(tailBlockIndex.get(currentGroupId), tailPosition.get(currentGroupId)); nextBlockIndex.set(absoluteTailAddress, insertedBlockIndex); nextPosition.set(absoluteTailAddress, insertedPosition); tailPosition.set(currentGroupId, insertedPosition); groupEntryCount.increment(currentGroupId); currentPageBuilder.declarePosition(); totalPositions++;
private IntIterator hashSortedGroupIds() { IntBigArray groupIds = new IntBigArray(); groupIds.ensureCapacity(groupByHash.getGroupCount()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { groupIds.set(i, i); } groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId))); return new AbstractIntIterator() { private final int totalPositions = groupByHash.getGroupCount(); private int position; @Override public boolean hasNext() { return position < totalPositions; } @Override public int nextInt() { return groupIds.get(position++); } }; }
private void addNewGroup(long groupId, Block block, int position, long count) { checkState(isEmpty(), "bucket %s not empty, points to %s", bucketId, buckets.get(bucketId)); // we've already computed the value hash for only the value only; ValueStore will save it for future use int nextValuePosition = valueStore.addAndGetPosition(type, block, position, valueHash); // set value pointer to hash map of values valuePositions.set(nodePointerToUse, nextValuePosition); // save hashes for future rehashing valueAndGroupHashes.set(nodePointerToUse, valueAndGroupHash); // set pointer to node for this bucket buckets.set(bucketId, nodePointerToUse); // save data for this node counts.set(nodePointerToUse, count); // used for doing value comparisons on hash collisions groupIds.set(nodePointerToUse, groupId); // we only ever store ints as values; we need long as an index int currentHead = (int) headPointers.get(groupId); // maintain linked list of nodes in this group (insert at head) headPointers.set(groupId, nodePointerToUse); nextPointers.set(nodePointerToUse, currentHead); } }
public long getEstimatedSize() { return INSTANCE_SIZE + buckets.sizeOf(); }
@Override public void writeBlock(Block block) { checkState(!closed); checkArgument(block.getPositionCount() > 0, "Block is empty"); if (directEncoded) { directColumnWriter.writeBlock(block); return; } // record values values.ensureCapacity(rowGroupValueCount + block.getPositionCount()); for (int position = 0; position < block.getPositionCount(); position++) { int index = dictionary.putIfAbsent(block, position); values.set(rowGroupValueCount, index); rowGroupValueCount++; totalValueCount++; if (!block.isNull(position)) { // todo min/max statistics only need to be updated if value was not already in the dictionary, but non-null count does statisticsBuilder.addValue(type.getSlice(block, position)); rawBytes += block.getSliceLength(position); totalNonNullValueCount++; } } }
@Override public void setPartitionCount(int partitionCount) { this.partitionCounts.set(groupId, partitionCount); }
@Override public void ensureCapacity(long size) { headBlockIndex.ensureCapacity(size); headPosition.ensureCapacity(size); tailBlockIndex.ensureCapacity(size); tailPosition.ensureCapacity(size); groupEntryCount.ensureCapacity(size); }
public DictionaryBuilder(int expectedSize) { checkArgument(expectedSize >= 0, "expectedSize must not be negative"); // todo we can do better int expectedEntries = min(expectedSize, DEFAULT_MAX_PAGE_SIZE_IN_BYTES / EXPECTED_BYTES_PER_ENTRY); // it is guaranteed expectedEntries * EXPECTED_BYTES_PER_ENTRY will not overflow this.elementBlock = new VariableWidthBlockBuilder( null, expectedEntries, expectedEntries * EXPECTED_BYTES_PER_ENTRY); // first position is always null this.elementBlock.appendNull(); int hashSize = arraySize(expectedSize, FILL_RATIO); this.maxFill = calculateMaxFill(hashSize); this.hashMask = hashSize - 1; blockPositionByHash.ensureCapacity(hashSize); blockPositionByHash.fill(EMPTY_SLOT); this.containsNullElement = false; }
@Override public Map<Integer, ColumnStatistics> finishRowGroup() { checkState(!closed); checkState(inRowGroup); inRowGroup = false; if (directEncoded) { return directColumnWriter.finishRowGroup(); } ColumnStatistics statistics = statisticsBuilder.buildColumnStatistics(); rowGroups.add(new DictionaryRowGroup(values, rowGroupValueCount, statistics)); rowGroupValueCount = 0; statisticsBuilder = newStringStatisticsBuilder(); values = new IntBigArray(); return ImmutableMap.of(column, statistics); }
/** * Creates a new big array containing one initial segment filled with the specified default value */ public IntBigArray(int initialValue) { this.initialValue = initialValue; array = new int[INITIAL_SEGMENTS][]; allocateNewSegment(); }
public void clear() { containsNullElement = false; blockPositionByHash.fill(EMPTY_SLOT); elementBlock = elementBlock.newBlockBuilderLike(null); // first position is always null elementBlock.appendNull(); }
/** * Ensures this big array is at least the specified length. If the array is smaller, segments * are added until the array is larger then the specified length. */ public void ensureCapacity(long length) { if (capacity > length) { return; } grow(length); }
private boolean writeDictionaryRowGroup(Block dictionary, int valueCount, IntBigArray dictionaryIndexes, int maxDirectBytes) int[][] segments = dictionaryIndexes.getSegments(); for (int i = 0; valueCount > 0 && i < segments.length; i++) { int[] segment = segments[i];
private void rehash() { long newCapacityLong = hashCapacity * 2L; if (newCapacityLong > Integer.MAX_VALUE) { throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries"); } int newCapacity = (int) newCapacityLong; int newMask = newCapacity - 1; IntBigArray newHashPositions = new IntBigArray(-1); newHashPositions.ensureCapacity(newCapacity); for (int i = 0; i < values.getPositionCount(); i++) { // find an empty slot for the address int hashPosition = getBucketId(TypeUtils.hashPosition(type, values, i), newMask); while (newHashPositions.get(hashPosition) != -1) { hashPosition = (hashPosition + 1) & newMask; } // record the mapping newHashPositions.set(hashPosition, i); } hashCapacity = newCapacity; mask = newMask; maxFill = calculateMaxFill(newCapacity); hashPositions = newHashPositions; this.counts.ensureCapacity(maxFill); }
capacity *= 1.5; nextBlockIndex.ensureCapacity(capacity); nextPosition.ensureCapacity(capacity); headPosition.set(currentGroupId, insertedPosition); long absoluteTailAddress = toAbsolutePosition(tailBlockIndex.get(currentGroupId), tailPosition.get(currentGroupId)); nextBlockIndex.set(absoluteTailAddress, insertedBlockIndex); nextPosition.set(absoluteTailAddress, insertedPosition); tailPosition.set(currentGroupId, insertedPosition); groupEntryCount.increment(currentGroupId); currentPageBuilder.declarePosition(); totalPositions++;
private IntIterator hashSortedGroupIds() { IntBigArray groupIds = new IntBigArray(); groupIds.ensureCapacity(groupByHash.getGroupCount()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { groupIds.set(i, i); } groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId))); return new AbstractIntIterator() { private final int totalPositions = groupByHash.getGroupCount(); private int position; @Override public boolean hasNext() { return position < totalPositions; } @Override public int nextInt() { return groupIds.get(position++); } }; }