@Override public void setPartitionCount(int partitionCount) { this.partitionCounts.set(groupId, partitionCount); }
private void addNewGroup(long groupId, Block block, int position, long count) { checkState(isEmpty(), "bucket %s not empty, points to %s", bucketId, buckets.get(bucketId)); // we've already computed the value hash for only the value only; ValueStore will save it for future use int nextValuePosition = valueStore.addAndGetPosition(type, block, position, valueHash); // set value pointer to hash map of values valuePositions.set(nodePointerToUse, nextValuePosition); // save hashes for future rehashing valueAndGroupHashes.set(nodePointerToUse, valueAndGroupHash); // set pointer to node for this bucket buckets.set(bucketId, nodePointerToUse); // save data for this node counts.set(nodePointerToUse, count); // used for doing value comparisons on hash collisions groupIds.set(nodePointerToUse, groupId); // we only ever store ints as values; we need long as an index int currentHead = (int) headPointers.get(groupId); // maintain linked list of nodes in this group (insert at head) headPointers.set(groupId, nodePointerToUse); nextPointers.set(nodePointerToUse, currentHead); } }
private void addNewGroup(long groupId, Block block, int position, long count) { checkState(isEmpty(), "bucket %s not empty, points to %s", bucketId, buckets.get(bucketId)); // we've already computed the value hash for only the value only; ValueStore will save it for future use int nextValuePosition = valueStore.addAndGetPosition(type, block, position, valueHash); // set value pointer to hash map of values valuePositions.set(nodePointerToUse, nextValuePosition); // save hashes for future rehashing valueAndGroupHashes.set(nodePointerToUse, valueAndGroupHash); // set pointer to node for this bucket buckets.set(bucketId, nodePointerToUse); // save data for this node counts.set(nodePointerToUse, count); // used for doing value comparisons on hash collisions groupIds.set(nodePointerToUse, groupId); // we only ever store ints as values; we need long as an index int currentHead = (int) headPointers.get(groupId); // maintain linked list of nodes in this group (insert at head) headPointers.set(groupId, nodePointerToUse); nextPointers.set(nodePointerToUse, currentHead); } }
private int addNewGroup(long hashPosition, long value) { // record group id in hash int groupId = nextGroupId++; values.set(hashPosition, value); valuesByGroupId.set(groupId, value); groupIds.set(hashPosition, groupId); // increase capacity, if necessary if (needRehash()) { tryRehash(); } return groupId; }
private int addNewGroup(long hashPosition, long value) { // record group id in hash int groupId = nextGroupId++; values.set(hashPosition, value); valuesByGroupId.set(groupId, value); groupIds.set(hashPosition, groupId); // increase capacity, if necessary if (needRehash()) { tryRehash(); } return groupId; }
private int addNewElement(long hashPosition, Block block, int position) { checkArgument(!block.isNull(position), "position is null"); block.writeBytesTo(position, 0, block.getSliceLength(position), elementBlock); elementBlock.closeEntry(); int newElementPositionInBlock = elementBlock.getPositionCount() - 1; blockPositionByHash.set(hashPosition, newElementPositionInBlock); // increase capacity, if necessary if (elementBlock.getPositionCount() >= maxFill) { rehash(maxFill * 2); } return newElementPositionInBlock; }
private int addNewElement(long hashPosition, Block block, int position) { checkArgument(!block.isNull(position), "position is null"); block.writeBytesTo(position, 0, block.getSliceLength(position), elementBlock); elementBlock.closeEntry(); int newElementPositionInBlock = elementBlock.getPositionCount() - 1; blockPositionByHash.set(hashPosition, newElementPositionInBlock); // increase capacity, if necessary if (elementBlock.getPositionCount() >= maxFill) { rehash(maxFill * 2); } return newElementPositionInBlock; }
private void addNewGroup(int hashPosition, int position, Block block, long count) { hashPositions.set(hashPosition, values.getPositionCount()); counts.set(values.getPositionCount(), count); type.appendTo(block, position, values); // increase capacity, if necessary if (values.getPositionCount() >= maxFill) { rehash(); } }
private void addNewGroup(int hashPosition, int position, Block block, long count) { hashPositions.set(hashPosition, values.getPositionCount()); counts.set(values.getPositionCount(), count); type.appendTo(block, position, values); // increase capacity, if necessary if (values.getPositionCount() >= maxFill) { rehash(); } }
private void rehash(int size) { int newHashSize = arraySize(size + 1, FILL_RATIO); hashMask = newHashSize - 1; maxFill = calculateMaxFill(newHashSize); blockPositionByHash.ensureCapacity(newHashSize); blockPositionByHash.fill(EMPTY_SLOT); // the first element of elementBlock is always null for (int blockPosition = 1; blockPosition < elementBlock.getPositionCount(); blockPosition++) { blockPositionByHash.set(getHashPositionOfElement(elementBlock, blockPosition), blockPosition); } }
private void rehash(int size) { int newHashSize = arraySize(size + 1, FILL_RATIO); hashMask = newHashSize - 1; maxFill = calculateMaxFill(newHashSize); blockPositionByHash.ensureCapacity(newHashSize); blockPositionByHash.fill(EMPTY_SLOT); // the first element of elementBlock is always null for (int blockPosition = 1; blockPosition < elementBlock.getPositionCount(); blockPosition++) { blockPositionByHash.set(getHashPositionOfElement(elementBlock, blockPosition), blockPosition); } }
@Override public void writeBlock(Block block) { checkState(!closed); checkArgument(block.getPositionCount() > 0, "Block is empty"); if (directEncoded) { directColumnWriter.writeBlock(block); return; } // record values values.ensureCapacity(rowGroupValueCount + block.getPositionCount()); for (int position = 0; position < block.getPositionCount(); position++) { int index = dictionary.putIfAbsent(block, position); values.set(rowGroupValueCount, index); rowGroupValueCount++; totalValueCount++; if (!block.isNull(position)) { // todo min/max statistics only need to be updated if value was not already in the dictionary, but non-null count does statisticsBuilder.addValue(type.getSlice(block, position)); rawBytes += block.getSliceLength(position); totalNonNullValueCount++; } } }
@Override public void writeBlock(Block block) { checkState(!closed); checkArgument(block.getPositionCount() > 0, "Block is empty"); if (directEncoded) { directColumnWriter.writeBlock(block); return; } // record values values.ensureCapacity(rowGroupValueCount + block.getPositionCount()); for (int position = 0; position < block.getPositionCount(); position++) { int index = dictionary.putIfAbsent(block, position); values.set(rowGroupValueCount, index); rowGroupValueCount++; totalValueCount++; if (!block.isNull(position)) { // todo min/max statistics only need to be updated if value was not already in the dictionary, but non-null count does statisticsBuilder.addValue(type.getSlice(block, position)); rawBytes += block.getSliceLength(position); totalNonNullValueCount++; } } }
private IntIterator hashSortedGroupIds() { IntBigArray groupIds = new IntBigArray(); groupIds.ensureCapacity(groupByHash.getGroupCount()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { groupIds.set(i, i); } groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId))); return new AbstractIntIterator() { private final int totalPositions = groupByHash.getGroupCount(); private int position; @Override public boolean hasNext() { return position < totalPositions; } @Override public int nextInt() { return groupIds.get(position++); } }; }
private IntIterator hashSortedGroupIds() { IntBigArray groupIds = new IntBigArray(); groupIds.ensureCapacity(groupByHash.getGroupCount()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { groupIds.set(i, i); } groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId))); return new AbstractIntIterator() { private final int totalPositions = groupByHash.getGroupCount(); private int position; @Override public boolean hasNext() { return position < totalPositions; } @Override public int nextInt() { return groupIds.get(position++); } }; }
newBuckets.set(bucketId, i);
newBuckets.set(bucketId, i);
private void rehash() { long newCapacityLong = hashCapacity * 2L; if (newCapacityLong > Integer.MAX_VALUE) { throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries"); } int newCapacity = (int) newCapacityLong; int newMask = newCapacity - 1; IntBigArray newHashPositions = new IntBigArray(-1); newHashPositions.ensureCapacity(newCapacity); for (int i = 0; i < values.getPositionCount(); i++) { // find an empty slot for the address int hashPosition = getBucketId(TypeUtils.hashPosition(type, values, i), newMask); while (newHashPositions.get(hashPosition) != -1) { hashPosition = (hashPosition + 1) & newMask; } // record the mapping newHashPositions.set(hashPosition, i); } hashCapacity = newCapacity; mask = newMask; maxFill = calculateMaxFill(newCapacity); hashPositions = newHashPositions; this.counts.ensureCapacity(maxFill); }
private void rehash() { long newCapacityLong = hashCapacity * 2L; if (newCapacityLong > Integer.MAX_VALUE) { throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries"); } int newCapacity = (int) newCapacityLong; int newMask = newCapacity - 1; IntBigArray newHashPositions = new IntBigArray(-1); newHashPositions.ensureCapacity(newCapacity); for (int i = 0; i < values.getPositionCount(); i++) { // find an empty slot for the address int hashPosition = getBucketId(TypeUtils.hashPosition(type, values, i), newMask); while (newHashPositions.get(hashPosition) != -1) { hashPosition = (hashPosition + 1) & newMask; } // record the mapping newHashPositions.set(hashPosition, i); } hashCapacity = newCapacity; mask = newMask; maxFill = calculateMaxFill(newCapacity); hashPositions = newHashPositions; this.counts.ensureCapacity(maxFill); }
private void rehash() { long newBucketCountLong = bucketCount * 2L; if (newBucketCountLong > Integer.MAX_VALUE) { throw new PrestoException(GENERIC_INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed " + Integer.MAX_VALUE + " entries (" + newBucketCountLong + ")"); } int newBucketCount = computeBucketCount((int) newBucketCountLong, MAX_FILL_RATIO); int newMask = newBucketCount - 1; IntBigArray newBuckets = new IntBigArray(-1); newBuckets.ensureCapacity(newBucketCount); for (int i = 0; i < nextNodePointer; i++) { // find the old one int bucketId = getBucketIdForNode(i, newMask); int probeCount = 1; int originalBucket = bucketId; // find new one while (newBuckets.get(bucketId) != -1) { int probe = nextProbe(probeCount); bucketId = nextBucketId(originalBucket, newMask, probe); probeCount++; } // record the mapping newBuckets.set(bucketId, i); } buckets = newBuckets; bucketCount = newBucketCount; maxFill = calculateMaxFill(newBucketCount, MAX_FILL_RATIO); mask = newMask; resizeNodeArrays(newBucketCount); }