OrcFile.readerOptions( conf ).filesystem( fs ) ); } catch ( IOException e ) { throw new IllegalArgumentException( "Unable to read data from file " + fileName, e );
OrcFile.readerOptions(hadoopConf).maxLength(OrcConf.MAX_FILE_LENGTH.getLong(hadoopConf)));
FileSystem fs = input.getFileSystem(conf); Reader reader = createReader(input, readerOptions(options.getConfiguration()).filesystem(fs));
FileSystem fs = options.getFilesystem(); if (fs == null) { fs = path.getFileSystem(options.getConfiguration()); this.conf = options.getConfiguration(); this.maxLength = options.getMaxLength(); this.useUTCTimestamp = options.getUseUTCTimestamp(); FileMetadata fileMetadata = options.getFileMetadata(); if (fileMetadata != null) { this.compressionKind = fileMetadata.getCompressionKind(); this.userMetadata = null; // not cached and not needed here } else { OrcTail orcTail = options.getOrcTail(); if (orcTail == null) { tail = extractFileTail(fs, path, options.getMaxLength()); options.orcTail(tail); } else { checkOrcVersion(path, orcTail.getPostScript());
OrcFile.readerOptions( conf ).filesystem( fs ) ); } catch ( IOException e ) { throw new RuntimeException( "Unable to read data from file " + fileName, e );
@Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); orcSchema = reader.getSchema(); requestedColumnIds = OrcUtils.requestedColumnIds(caseSensitive, fieldNames, schemaFieldNames, reader); initBatch(); }
@Override public RecordReader<NullWritable, V> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext ) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader file = OrcFile.createReader(split.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))); return new OrcMapreduceRecordReader<>(file, org.apache.orc.mapred.OrcInputFormat.buildOptions(conf, file, split.getStart(), split.getLength())); }
public static ReaderOptions readerOptions(Configuration conf) { return new ReaderOptions(conf); }
private static boolean needsCompaction(FileStatus bucket, FileSystem fs) throws IOException { //create reader, look at footer //no need to check side file since it can only be in a streaming ingest delta Reader orcReader = OrcFile.createReader(bucket.getPath(),OrcFile.readerOptions(fs.getConf()) .filesystem(fs)); AcidStats as = OrcAcidUtils.parseAcidStats(orcReader); if(as == null) { //should never happen since we are reading bucket_x written by acid write throw new IllegalStateException("AcidStats missing in " + bucket.getPath()); } return as.deletes > 0 || as.updates > 0; } private static String getCompactionCommand(Table t, Partition p) {
@Override public RecordReader<NullWritable, V> getRecordReader(InputSplit inputSplit, JobConf conf, Reporter reporter) throws IOException { FileSplit split = (FileSplit) inputSplit; Reader file = OrcFile.createReader(split.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf))); return new OrcMapredRecordReader<>(file, buildOptions(conf, file, split.getStart(), split.getLength())); }
protected RecordReader createReader( FileInputSplit fileSplit, TaskAttemptContext taskAttemptContext) throws IOException { // by default, we use org.apache.orc.mapreduce.OrcMapreduceRecordReader Configuration hadoopConf = taskAttemptContext.getConfiguration(); org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(fileSplit.getPath().toUri()); Reader file = OrcFile.createReader(filePath, OrcFile.readerOptions(hadoopConf).maxLength(OrcConf.MAX_FILE_LENGTH.getLong(hadoopConf))); return new OrcMapreduceRecordReader<>(file, org.apache.orc.mapred.OrcInputFormat.buildOptions(hadoopConf, file, fileSplit.getStart(), fileSplit.getLength())); } }
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
/** * Initialize ORC file reader and batch record reader. * Please note that `initBatch` is needed to be called after this. */ @Override public void initialize( InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException { FileSplit fileSplit = (FileSplit)inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Reader reader = OrcFile.createReader( fileSplit.getPath(), OrcFile.readerOptions(conf) .maxLength(OrcConf.MAX_FILE_LENGTH.getLong(conf)) .filesystem(fileSplit.getPath().getFileSystem(conf))); Reader.Options options = OrcInputFormat.buildOptions(conf, reader, fileSplit.getStart(), fileSplit.getLength()); recordReader = reader.rows(options); }
public ReaderOptions filesystem(FileSystem fs) { super.filesystem(fs); return this; }
public ReaderOptions fileMetadata(FileMetadata metadata) { super.fileMetadata(metadata); return this; }
public ReaderOptions orcTail(OrcTail orcTail) { super.orcTail(orcTail); return this; } }
public ReaderOptions maxLength(long val) { super.maxLength(val); return this; }
public ReaderOptions filesystem(FileSystem fs) { super.filesystem(fs); return this; }
public ReaderOptions orcTail(OrcTail orcTail) { super.orcTail(orcTail); return this; }
public ReaderOptions fileMetadata(FileMetadata metadata) { super.fileMetadata(metadata); return this; }