Skip to content

Usage

Usage#

This page demonstrates how to use the experimental ArraySpec and GroupSpec models for Zarr V2 and V3.

Creating an ArraySpec#

The ArraySpec model represents Zarr array metadata.

from pydantic_zarr.experimental.v2 import ArraySpec

# Create a simple array specification
array = ArraySpec(
    shape=(1000, 1000),
    dtype='uint8',
    chunks=(100, 100),
    attributes={'description': 'my array', 'units': 'meters'}
)

# Get the model as a JSON string
spec_json = array.model_dump_json(indent=2)
print(spec_json)
"""
{
  "zarr_format": 2,
  "attributes": {
    "description": "my array",
    "units": "meters"
  },
  "shape": [
    1000,
    1000
  ],
  "chunks": [
    100,
    100
  ],
  "dtype": "|u1",
  "fill_value": 0,
  "order": "C",
  "filters": null,
  "dimension_separator": "/",
  "compressor": null
}
"""
from pydantic_zarr.experimental.v3 import ArraySpec

# Create a simple array specification
array = ArraySpec(
    shape=(1000, 1000),
    data_type='uint8',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (100, 100)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={'description': 'my array', 'units': 'meters'}
)

# Get the model as JSON string
spec_json = array.model_dump_json(indent=2)
print(spec_json)
"""
{
  "zarr_format": 3,
  "node_type": "array",
  "attributes": {
    "description": "my array",
    "units": "meters"
  },
  "shape": [
    1000,
    1000
  ],
  "data_type": "uint8",
  "chunk_grid": {
    "name": "regular",
    "configuration": {
      "chunk_shape": [
        100,
        100
      ]
    }
  },
  "chunk_key_encoding": {
    "name": "default",
    "configuration": {
      "separator": "/"
    }
  },
  "fill_value": 0,
  "codecs": [
    {
      "name": "bytes"
    }
  ],
  "storage_transformers": [],
  "dimension_names": null
}
"""

Creating a Group Specification#

The GroupSpec model represents a Zarr group that can contain arrays and other groups as members.

from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec

# Create ArraySpec for group members
data_array = ArraySpec(
    shape=(1000, 1000),
    dtype='float32',
    chunks=(100, 100),
    attributes={'description': 'image data'}
)

metadata_array = ArraySpec(
    shape=(1000,),
    dtype='uint32',
    chunks=(100,),
    attributes={'description': 'pixel metadata'}
)

# Create a group containing these arrays
group = GroupSpec(
    attributes={
        'name': 'experiment_001',
        'date': '2024-11-23',
        'version': 1
    },
    members={
        'image': data_array,
        'metadata': metadata_array
    }
)
from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec

# Create ArraySpec for group members
data_array = ArraySpec(
    shape=(1000, 1000),
    data_type='float32',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (100, 100)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={'description': 'image data'}
)

metadata_array = ArraySpec(
    shape=(1000,),
    data_type='uint32',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (100,)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={'description': 'pixel metadata'}
)

# Create a GroupSpec containing these arrays
group = GroupSpec(
    attributes={
        'name': 'experiment_001',
        'date': '2024-11-23',
        'version': 1
    },
    members={
        'image': data_array,
        'metadata': metadata_array
    }
)

Nested Groups#

You can create hierarchical structures by nesting groups within groups.

from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec

# Create a multi-level hierarchy
raw_data = ArraySpec(
    shape=(512, 512),
    dtype='uint8',
    chunks=(64, 64),
    attributes={}
)

processed_data = ArraySpec(
    shape=(512, 512),
    dtype='float32',
    chunks=(64, 64),
    attributes={}
)

# Create sub-groups
raw_group = GroupSpec(
    attributes={'processing_level': 'raw'},
    members={'data': raw_data}
)

processed_group = GroupSpec(
    attributes={'processing_level': 'processed'},
    members={'data': processed_data}
)

# Create root group containing sub-groups
root = GroupSpec(
    attributes={'project': 'imaging_study'},
    members={
        'raw': raw_group,
        'processed': processed_group
    }
)
from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec

# Create a multi-level hierarchy
raw_data = ArraySpec(
    shape=(512, 512),
    data_type='uint8',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (64, 64)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={}
)

processed_data = ArraySpec(
    shape=(512, 512),
    data_type='float32',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (64, 64)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={}
)

# Create sub-groups
raw_group = GroupSpec(
    attributes={'processing_level': 'raw'},
    members={'data': raw_data}
)

processed_group = GroupSpec(
    attributes={'processing_level': 'processed'},
    members={'data': processed_data}
)

# Create root group containing sub-groups
root = GroupSpec(
    attributes={'project': 'imaging_study'},
    members={
        'raw': raw_group,
        'processed': processed_group
    }
)

Working with Flattened Hierarchies#

The to_flat() method converts a hierarchical group structure into a flat dictionary representation. In the dict form, instances of GroupSpec are converted to instances of BaseGroupSpec, which models a Zarr group without any members. We use a different type because in the flat representation, the hierarchy structure is fully encoded by the keys of the dict.

from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec

# Create a group hierarchy
array = ArraySpec(
    shape=(100,),
    dtype='float32',
    chunks=(10,),
    attributes={}
)

subgroup = GroupSpec(
    attributes={'level': 1},
    members={'data': array}
)

root = GroupSpec(
    attributes={'level': 0},
    members={'sub': subgroup}
)

# Convert to flat representation
flat = root.to_flat()
print(flat)
"""
{
    '': BaseGroupSpec(zarr_format=2, attributes={'level': 0}),
    '/sub': BaseGroupSpec(zarr_format=2, attributes={'level': 1}),
    '/sub/data': ArraySpec(
        zarr_format=2,
        attributes={},
        shape=(100,),
        chunks=(10,),
        dtype='<f4',
        fill_value=0,
        order='C',
        filters=None,
        dimension_separator='/',
        compressor=None,
    ),
}
"""
from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec

# Create a group hierarchy
array = ArraySpec(
    shape=(100,),
    data_type='float32',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (10,)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={}
)

subgroup = GroupSpec(
    attributes={'level': 1},
    members={'data': array}
)

root = GroupSpec(
    attributes={'level': 0},
    members={'sub': subgroup}
)

# Convert to flat representation
flat = root.to_flat()
print(flat)
"""
{
    '': BaseGroupSpec(zarr_format=3, attributes={'level': 0}),
    '/sub': BaseGroupSpec(zarr_format=3, attributes={'level': 1}),
    '/sub/data': ArraySpec(
        zarr_format=3,
        node_type='array',
        attributes={},
        shape=(100,),
        data_type='float32',
        chunk_grid={'name': 'regular', 'configuration': {'chunk_shape': (10,)}},
        chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
        fill_value=0,
        codecs=({'name': 'bytes'},),
        storage_transformers=(),
        dimension_names=None,
    ),
}
"""

Comparing Arrays and Groups#

Use the like() method to compare ArraySpec or GroupSpec instances to check if they are structurally equivalent.

from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec

# Create two similar arrays
array1 = ArraySpec(
    shape=(100, 100),
    dtype='uint8',
    chunks=(10, 10),
    attributes={'name': 'array1'}
)

array2 = ArraySpec(
    shape=(100, 100),
    dtype='uint8',
    chunks=(10, 10),
    attributes={'name': 'array2'}
)

# False because of differing attributes
print(array1.like(array2))
#> False

# True because we are ignoring attributes
print(array1.like(array2, exclude={'attributes'}))
#> True

# Create two groups
group1 = GroupSpec(
    attributes={'version': 1},
    members={'data': array1}
)

group2 = GroupSpec(
    attributes={'version': 2},
    members={'data': array1}
)

# False because of differing attributes
print(group1.like(group2))
#> False

# True because we are ignoring attributes
print(group1.like(group2, exclude={'attributes'}))
#> True
from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec

# Create two similar arrays
array1 = ArraySpec(
    shape=(100, 100),
    data_type='uint8',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (10, 10)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={'name': 'array1'}
)

array2 = ArraySpec(
    shape=(100, 100),
    data_type='uint8',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (10, 10)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={'name': 'array2'}
)

# False because of differing attributes
print(array1.like(array2))
#> False

# True because we are ignoring attributes
print(array1.like(array2, exclude={'attributes'}))
#> True

# Create two groups
group1 = GroupSpec(
    attributes={'version': 1},
    members={'data': array1}
)

group2 = GroupSpec(
    attributes={'version': 2},
    members={'data': array1}
)

# False because of differing attributes
print(group1.like(group2))
#> False

# True because we are ignoring attributes
print(group1.like(group2, exclude={'attributes'}))
#> True

Type-safe Group Members with TypedDict#

Define strict schemas for group members using TypedDict to enable runtime validation.

from typing_extensions import TypedDict
from pydantic_zarr.experimental.v2 import ArraySpec, GroupSpec

# Define the expected structure of group members
class TimeseriesMembers(TypedDict):
    timestamps: ArraySpec
    values: ArraySpec

# Create ArraySpec
timestamps = ArraySpec(
    shape=(10000,),
    dtype='float64',
    chunks=(1000,),
    attributes={'units': 'seconds since epoch'}
)

values = ArraySpec(
    shape=(10000,),
    dtype='float32',
    chunks=(1000,),
    attributes={'units': 'meters'}
)

# Define a custom GroupSpec with typed members
class TimeseriesGroup(GroupSpec):
    members: TimeseriesMembers

# This succeeds - all required members present
ts_group = TimeseriesGroup(
    attributes={'sensor': 'accelerometer'},
    members={'timestamps': timestamps, 'values': values}
)

# This fails because the required member 'values' is missing
try:
    ts_group = TimeseriesGroup(
        attributes={'sensor': 'accelerometer'},
        members={'timestamps': timestamps}
    )
except ValueError as e:
    print(e)
    """
    1 validation error for TimeseriesGroup
    members.values
      Field required [type=missing, input_value={'timestamps': ArraySpec(...r='/', compressor=None)}, input_type=dict]
        For further information visit https://errors.pydantic.dev/2.12/v/missing
    """
from typing_extensions import TypedDict
from pydantic_zarr.experimental.v3 import ArraySpec, GroupSpec

# Define the expected structure of group members
class TimeseriesMembers(TypedDict):
    timestamps: ArraySpec
    values: ArraySpec

# Create ArraySpec
timestamps = ArraySpec(
    shape=(10000,),
    data_type='float64',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (1000,)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={'units': 'seconds since epoch'}
)

values = ArraySpec(
    shape=(10000,),
    data_type='float32',
    chunk_grid={
        'name': 'regular',
        'configuration': {'chunk_shape': (1000,)}
    },
    chunk_key_encoding={'name': 'default', 'configuration': {'separator': '/'}},
    codecs=[{'name': 'bytes'}],
    fill_value=0,
    attributes={'units': 'meters'}
)

# Define a custom GroupSpec with typed members
class TimeseriesGroup(GroupSpec):
    members: TimeseriesMembers

# This succeeds - all required members present
ts_group = TimeseriesGroup(
    attributes={'sensor': 'accelerometer'},
    members={'timestamps': timestamps, 'values': values}
)

# This fails because the required member 'values' is missing
try:
    ts_group = TimeseriesGroup(
        attributes={'sensor': 'accelerometer'},
        members={'timestamps': timestamps}
    )
except ValueError as e:
    print(e)
    """
    1 validation error for TimeseriesGroup
    members.values
      Field required [type=missing, input_value={'timestamps': ArraySpec(..., dimension_names=None)}, input_type=dict]
        For further information visit https://errors.pydantic.dev/2.12/v/missing
    """