from onprem import LLM
from onprem.pipelines import Extractorpipelines.extractor.models
EntityCollection
def EntityCollection(
data:Any
)->None:
Collection of entities extracted from a document.
Entity
def Entity(
data:Any
)->None:
A named entity extracted from text.
DocumentMetadata
def DocumentMetadata(
data:Any
)->None:
High-level metadata about a document.
KPPCollection
def KPPCollection(
data:Any
)->None:
Collection of Key Performance Parameters from acquisition documents.
KPPValue
def KPPValue(
data:Any
)->None:
A Key Performance Parameter with threshold and objective values.
ParamCollection
def ParamCollection(
data:Any
)->None:
!!! abstract “Usage Documentation” Models
A base class for creating Pydantic models.
Attributes: class_vars: The names of the class variables defined on the model. private_attributes: Metadata about the private attributes of the model. signature: The synthesized __init__ [Signature][inspect.Signature] of the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: A dictionary containing metadata about generic Pydantic models.
The `origin` and `args` items map to the [`__origin__`][genericalias.__origin__]
and [`__args__`][genericalias.__args__] attributes of [generic aliases][types-genericalias],
and the `parameter` item maps to the `__parameter__` attribute of generic classes.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].
__pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.
__pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.
__pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]
is set to `'allow'`.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.
SystemParameter
def SystemParameter(
data:Any
)->None:
!!! abstract “Usage Documentation” Models
A base class for creating Pydantic models.
Attributes: class_vars: The names of the class variables defined on the model. private_attributes: Metadata about the private attributes of the model. signature: The synthesized __init__ [Signature][inspect.Signature] of the model.
__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: A dictionary containing metadata about generic Pydantic models.
The `origin` and `args` items map to the [`__origin__`][genericalias.__origin__]
and [`__args__`][genericalias.__args__] attributes of [generic aliases][types-genericalias],
and the `parameter` item maps to the `__parameter__` attribute of generic classes.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].
__pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.
__pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.
__pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.
__pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]
is set to `'allow'`.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.
llm = LLM("anthropic/claude-sonnet-4-5", max_tokens=32000, mute_stream=True)# Initialize
extractor = Extractor(llm)
# Test document
content = """
Aircraft Specifications:
- Maximum Speed: 500 mph
- Cruise Speed (avg): 350 knots
- Operational Range: 2000 nm
- Payload Capacity: 5000 lbs
- Power Output: 1500 W
- Total Weight: 25000 kg
"""
# Extract with whitelist filtering
whitelist = {'range', 'cruise speed', 'weight', 'power'}
result = extractor.extract_parameters(
content=content,
parameter_whitelist=whitelist,
)
# Display results
print(f"Extracted {len(result['params'])} matching parameters:\n")
for p in result['params']:
print(f" {p['name']}: {p['value']} {p['unit']}")
# cruise speed: 350 knots -> matches "cruise speed"
# range: 2000 nm -> matches "operational range"
# power: 1500 W -> matches "power output"
# weight: 25000 kg -> matches "total weight" Whitelist filtered params: 6 → 4 items
Extracted 4 matching parameters:
cruise speed: 350 knots
range: 2000 nm
power: 1500 W
weight: 25000 kg