Source code for voxatlas.config.config
from copy import deepcopy
import yaml
from .defaults import DEFAULT_CONFIG
from .schema import validate_config
[docs]
def load_config(path: str) -> dict:
"""
Load a VoxAtlas YAML configuration file.
Expected YAML Format
--------------------
VoxAtlas configuration files are YAML mappings (YAML "dicts") with a small
set of conventional top-level keys. The minimal valid config contains a
``features`` list:
.. code-block:: yaml
features:
- acoustic.pitch.dummy
Optional keys supported by the pipeline and config layer include:
- ``pipeline``: pipeline runtime options (mapping)
- ``n_jobs``: number of worker processes per dependency layer (int)
- ``cache``: enable/disable on-disk feature caching (bool)
- ``cache_dir``: cache directory when caching is enabled (str)
- ``feature_config``: per-feature parameter overrides (mapping)
- keys are feature names from ``features``
- values are extractor-specific parameter mappings
Example with per-feature parameters and pipeline options:
.. code-block:: yaml
features:
- phonology.prosody.stressed
- acoustic.pitch.f0
pipeline:
n_jobs: 4
cache: true
cache_dir: .voxatlas_cache
feature_config:
phonology.prosody.stressed:
language: fra
resource_root: /path/to/resources/phonology
Parameters
----------
path : str
Filesystem path to a YAML configuration file.
Returns
-------
dict
Parsed configuration dictionary.
Raises
------
OSError
Raised when the file cannot be opened.
yaml.YAMLError
Raised when the YAML document is invalid.
Notes
-----
This function parses YAML only. It does not apply defaults or schema
validation. For the recommended entry point that validates and applies
defaults, see :func:`load_and_prepare_config`.
Examples
--------
>>> import tempfile
>>> from pathlib import Path
>>> from voxatlas.config import load_config
>>> yaml_text = "features:\\n - acoustic.pitch.dummy\\n"
>>> with tempfile.TemporaryDirectory() as tmp:
... path = Path(tmp) / "config.yaml"
... _ = path.write_text(yaml_text, encoding="utf-8")
... cfg = load_config(str(path))
... cfg["features"]
['acoustic.pitch.dummy']
"""
with open(path) as f:
cfg = yaml.safe_load(f)
return cfg
[docs]
def expand_defaults(cfg: dict) -> dict:
"""
Merge a user configuration with VoxAtlas defaults.
What "Expand Defaults" Means
----------------------------
VoxAtlas maintains a small built-in default configuration
(:data:`voxatlas.config.defaults.DEFAULT_CONFIG`). ``expand_defaults``
starts from a deep copy of that default mapping and then applies the user
configuration on top.
This is a **shallow top-level merge**:
- Only the first level of keys is merged (via ``dict.update``).
- If the user provides a top-level key, it **replaces** the default value
for that key entirely.
- Nested mappings are **not** deep-merged. For example, providing a
``pipeline`` mapping replaces the whole default ``pipeline`` mapping.
Concretely, given the default:
.. code-block:: python
{"features": [], "pipeline": {"cache": True}}
The following user config:
.. code-block:: python
{"pipeline": {"n_jobs": 4}}
Produces:
.. code-block:: python
{"features": [], "pipeline": {"n_jobs": 4}}
(note how ``pipeline.cache`` is not preserved because nested dicts are not
merged).
Parameters
----------
cfg : dict
User-supplied configuration dictionary.
Returns
-------
dict
Configuration with top-level defaults applied.
Notes
-----
If you want to override just one pipeline option while keeping other
defaults, pass the full desired ``pipeline`` mapping (or use
:func:`load_and_prepare_config`, which is the recommended config entry
point for most workflows).
Examples
--------
>>> from voxatlas.config import expand_defaults
>>> cfg = expand_defaults({"features": ["acoustic.pitch.dummy"]})
>>> cfg["features"]
['acoustic.pitch.dummy']
>>> sorted(cfg["pipeline"].keys())
['cache']
"""
final = deepcopy(DEFAULT_CONFIG)
final.update(cfg)
return final
[docs]
def load_and_prepare_config(path: str) -> dict:
"""
Load, validate, and normalize a VoxAtlas configuration.
Parameters
----------
path : str
Filesystem path to a YAML configuration file.
Returns
-------
dict
Validated configuration with defaults applied.
Raises
------
ConfigValidationError
Raised when the configuration does not satisfy the expected schema.
Notes
-----
This is the recommended configuration entry point for the CLI and tutorial
workflows.
Examples
--------
>>> import tempfile
>>> from pathlib import Path
>>> from voxatlas.config import load_and_prepare_config
>>> yaml_text = "features:\\n - acoustic.pitch.dummy\\n"
>>> with tempfile.TemporaryDirectory() as tmp:
... path = Path(tmp) / "config.yaml"
... _ = path.write_text(yaml_text, encoding="utf-8")
... cfg = load_and_prepare_config(str(path))
... cfg["features"]
['acoustic.pitch.dummy']
"""
cfg = load_config(path)
validate_config(cfg)
final_cfg = expand_defaults(cfg)
return final_cfg