org.apache.parquet.hadoop.api.InitContext.getFileSchema java code examples

@Override
public ReadContext init(InitContext context)
{
 MessageType requestedProjection = getSchemaForRead(context.getFileSchema(), getPartialReadSchema(context));
 return new ReadContext(requestedProjection);
}

 @Override
 public ReadContext init(InitContext context)
 {
  MessageType requestedProjection = getSchemaForRead(context.getFileSchema(), getPartialReadSchema(context));
  return new ReadContext(requestedProjection);
 }
}

/**
 * It creates the readContext for Parquet side with the requested schema during the init phase.
 *
 * @param context
 * @return the parquet ReadContext
 */
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) {
 Configuration configuration = context.getConfiguration();
 MessageType fileSchema = context.getFileSchema();
 String columnNames = configuration.get(IOConstants.COLUMNS);
 Map<String, String> contextMetadata = new HashMap<String, String>();
 boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
 if (columnNames != null) {
  List<String> columnNamesList = getColumnNames(columnNames);
  String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES);
  List<TypeInfo> columnTypesList = getColumnTypes(columnTypes);
  MessageType tableSchema =
   getRequestedSchemaForIndexAccess(indexAccess, columnNamesList, columnTypesList, fileSchema);
  contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, tableSchema.toString());
  contextMetadata.put(PARQUET_COLUMN_INDEX_ACCESS, String.valueOf(indexAccess));
  this.hiveTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList);
  return new ReadContext(getRequestedPrunedSchema(columnNamesList, tableSchema, configuration),
   contextMetadata);
 } else {
  contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, fileSchema.toString());
  return new ReadContext(fileSchema, contextMetadata);
 }
}

MessageType fullSchema = context.getFileSchema();

MessageType fullSchema = context.getFileSchema();

  @Override
  public ReadContext init(final InitContext context) {
    return new ReadContext(context.getFileSchema());
  }
}

@Override
public ReadContext init(InitContext context) {
 return new ReadContext(context.getFileSchema());
}

@Override
public ReadContext init(InitContext context) {
 String requestedProjectionString = context.getConfiguration().get(PB_REQUESTED_PROJECTION);
 if (requestedProjectionString != null && !requestedProjectionString.trim().isEmpty()) {
  MessageType requestedProjection = getSchemaForRead(context.getFileSchema(), requestedProjectionString);
  LOG.debug("Reading data with projection {}", requestedProjection);
  return new ReadContext(requestedProjection);
 } else {
  MessageType fileSchema = context.getFileSchema();
  LOG.debug("Reading data with schema {}", fileSchema);
  return new ReadContext(fileSchema);
 }
}

/**
 * Called on executor side before {@link #prepareForRead(Configuration, Map, MessageType, ReadContext)} and
 * instantiating actual Parquet record readers.
 * Responsible for figuring out Parquet requested schema used for column pruning.
 */
@Override
public ReadContext init(InitContext context) {
  MessageType requestedSchema = clipParquetSchema(context.getFileSchema().asGroupType(), fieldNames);
  return new ReadContext(requestedSchema, new HashMap<String, String>());
}

/**
 * called in {@link org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)} in the front end
 *
 * @param context the initialisation context
 * @return the readContext that defines how to read the file
 */
public ReadContext init(InitContext context) {
 return init(context.getConfiguration(), context.getMergedKeyValueMetaData(), context.getFileSchema());
}

/**
 * called in {@link org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)} in the front end
 *
 * @param context the initialisation context
 * @return the readContext that defines how to read the file
 */
public ReadContext init(InitContext context) {
 return init(context.getConfiguration(), context.getMergedKeyValueMetaData(), context.getFileSchema());
}

@Override
public ReadContext init( InitContext context ) {
 String schemaStr = context.getConfiguration().get( ParquetConverter.PARQUET_SCHEMA_CONF_KEY );
 if ( schemaStr == null ) {
  throw new RuntimeException( "Schema not defined in the PentahoParquetSchema key" );
 }
 ParquetInputFieldList schema = ParquetInputFieldList.unmarshall( schemaStr );
 converter = new ParquetConverter( schema.getFields() );
 // get all fields from file's schema
 MessageType fileSchema = context.getFileSchema();
 List<Type> newFields = new ArrayList<>();
 // use only required fields
 for ( IParquetInputField f : schema ) {
  Type origField = fileSchema.getFields().get( fileSchema.getFieldIndex( f.getFormatFieldName() ) );
  newFields.add( origField );
 }
 if ( newFields.isEmpty() ) {
  throw new RuntimeException( "Fields should be declared" );
 }
 MessageType newSchema = new MessageType( fileSchema.getName(), newFields );
 return new ReadContext( newSchema, new HashMap<>() );
}

@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) {
 final Configuration configuration = context.getConfiguration();
 final MessageType fileMessageType = context.getFileSchema();
 MessageType requestedProjection = fileMessageType;
 String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA);
 FieldProjectionFilter projectionFilter = getFieldProjectionFilter(configuration);
 if (partialSchemaString != null && projectionFilter != null) {
  throw new ThriftProjectionException(
    String.format("You cannot provide both a partial schema and field projection filter."
        + "Only one of (%s, %s, %s) should be set.",
      PARQUET_READ_SCHEMA, STRICT_THRIFT_COLUMN_FILTER_KEY, THRIFT_COLUMN_FILTER_KEY));
 }
 //set requestedProjections only when it's specified
 if (partialSchemaString != null) {
  requestedProjection = getSchemaForRead(fileMessageType, partialSchemaString);
 } else if (projectionFilter != null) {
  try {
   initThriftClassFromMultipleFiles(context.getKeyValueMetadata(), configuration);
   requestedProjection =  getProjectedSchema(projectionFilter);
  } catch (ClassNotFoundException e) {
   throw new ThriftProjectionException("can not find thriftClass from configuration", e);
  }
 }
 MessageType schemaForRead = getSchemaForRead(fileMessageType, requestedProjection);
 return new ReadContext(schemaForRead);
}

Javadoc

this is the union of all the schemas when reading multiple files.

Popular methods of InitContext

<init>
getConfiguration
getMergedKeyValueMetaData
If there is a conflicting value when reading from multiple files, an exception will be thrown
getKeyValueMetadata
each key is associated with the list of distinct values found in footers

Popular in Java

Creating JSON documents from java classes using gson
requestLocationUpdates (LocationManager)
addToBackStack (FragmentTransaction)
getApplicationContext (Context)
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
Component (java.awt)
A component is an object having a graphical representation that can be displayed on the screen and t
Best plugins for Eclipse

How to use getFileSchemamethodin org.apache.parquet.hadoop.api.InitContext

Best Java code snippets using org.apache.parquet.hadoop.api.InitContext.getFileSchema (Showing top 13 results out of 315)

How to use
getFileSchema
method
in
org.apache.parquet.hadoop.api.InitContext