Skip to content

Sample input

sample_input #

SampleInput #

Bases: pydantic.BaseModel

Configuration model for processing medical imaging samples.

This class provides a standardized configuration for loading and processing medical imaging data, including DICOM crawling and ROI matching settings.

Attributes:

Name Type Description
directory pathlib.Path

Directory containing the input files. Must exist and be readable.

dataset_name str | None

Optional name for the dataset. Defaults to the base name of the input directory.

update_crawl bool

Whether to force a new crawl even if one exists. Default is False.

n_jobs int

Number of jobs to run in parallel. Default is (CPU cores - 2) or 1.

modalities list[str] | None

List of modalities to include. None means include all modalities.

roi_matcher imgtools.coretypes.masktypes.ROIMatcher

Configuration for matching regions of interest in the images.

Examples:

>>> from imgtools.io.loaders.sample_input import (
...     SampleInput,
... )
>>> config = SampleInput(
...     directory="data/NSCLC-Radiomics"
... )
>>> config.dataset_name
'NSCLC-Radiomics'
>>> # Using the factory method with ROI matching parameters
>>> config = SampleInput.build(
...     directory="data/NSCLC-Radiomics",
...     roi_match_map={
...         "GTV": ["GTV.*"],
...         "PTV": ["PTV.*"],
...     },
...     roi_ignore_case=True,
...     roi_handling_strategy="merge",
... )

Methods:

Name Description
build

Create a SampleInput with separate parameters for ROIMatcher.

default

Create a default SampleInput instance.

query

Query the interlacer for a specific modality.

crawler property #

crawler: imgtools.dicom.crawl.Crawler

Get the Crawler instance, initializing it if needed.

Returns:

Type Description
imgtools.dicom.crawl.Crawler

A DICOM crawler instance, initialized with the current configuration.

Notes

The crawler is lazily initialized on first access.

interlacer property #

interlacer: imgtools.dicom.interlacer.Interlacer

Get the Interlacer instance, initializing it if needed.

Returns:

Type Description
imgtools.dicom.interlacer.Interlacer

An Interlacer instance tied to the current crawler.

Notes

The interlacer is lazily initialized on first access, which may trigger crawler initialization if it hasn't been accessed yet.

build classmethod #

build(
    directory: str | pathlib.Path,
    dataset_name: str | None = None,
    update_crawl: bool = False,
    n_jobs: int | None = None,
    modalities: list[str] | None = None,
    roi_match_map: imgtools.coretypes.masktypes.Valid_Inputs = None,
    roi_ignore_case: bool = True,
    roi_handling_strategy: (
        str | imgtools.coretypes.masktypes.ROIMatchStrategy
    ) = imgtools.coretypes.masktypes.ROIMatchStrategy.MERGE,
    roi_allow_multi_key_matches: bool = True,
    roi_on_missing_regex: (
        str
        | imgtools.coretypes.masktypes.ROIMatchFailurePolicy
    ) = imgtools.coretypes.masktypes.ROIMatchFailurePolicy.IGNORE,
) -> "SampleInput"

Create a SampleInput with separate parameters for ROIMatcher.

This factory method allows users to specify ROIMatcher parameters directly instead of constructing a objects separately.

Parameters:

Name Type Description Default
cls #
class

The SampleInput class

required
directory #
str | pathlib.Path

Directory containing the input files

required
dataset_name #
str | None

Name of the dataset, by default None (uses input directory name)

None
update_crawl #
bool

Whether to force recrawling, by default False

False
n_jobs #
int | None

Number of parallel jobs, by default None (uses CPU count - 2)

None
modalities #
list[str] | None

List of modalities to include, by default None (all)

None
roi_match_map #
imgtools.coretypes.masktypes.Valid_Inputs

ROI matching patterns, by default None

None
roi_ignore_case #
bool

Whether to ignore case in ROI matching, by default True

True
roi_handling_strategy #
str | imgtools.coretypes.masktypes.ROIMatchStrategy

Strategy for handling ROI matches, by default ROIMatchStrategy.MERGE

imgtools.coretypes.masktypes.ROIMatchStrategy.MERGE
roi_allow_multi_key_matches #
bool

Whether to allow one ROI to match multiple keys in the match_map.

True
roi_on_missing_regex #
str | imgtools.coretypes.masktypes.ROIMatchFailurePolicy

How to handle when no ROI matches any pattern in match_map.

imgtools.coretypes.masktypes.ROIMatchFailurePolicy.IGNORE

Returns:

Type Description
imgtools.io.sample_input.SampleInput

Configured SampleInput instance

Source code in src/imgtools/io/sample_input.py
@classmethod
def build(
    cls,
    directory: str | Path,
    dataset_name: str | None = None,
    update_crawl: bool = False,
    n_jobs: int | None = None,
    modalities: list[str] | None = None,
    roi_match_map: ROIMatcherInputs = None,
    roi_ignore_case: bool = True,
    roi_handling_strategy: str | ROIMatchStrategy = ROIMatchStrategy.MERGE,
    roi_allow_multi_key_matches: bool = True,
    roi_on_missing_regex: str | ROIMatchFailurePolicy = (
        ROIMatchFailurePolicy.IGNORE
    ),
) -> "SampleInput":
    """Create a SampleInput with separate parameters for ROIMatcher.

    This factory method allows users to specify ROIMatcher parameters directly
    instead of constructing a objects separately.

    Parameters
    ----------
    cls : class
        The SampleInput class
    directory : str | Path
        Directory containing the input files
    dataset_name : str | None, optional
        Name of the dataset, by default None (uses input directory name)
    update_crawl : bool, optional
        Whether to force recrawling, by default False
    n_jobs : int | None, optional
        Number of parallel jobs, by default None (uses CPU count - 2)
    modalities : list[str] | None, optional
        List of modalities to include, by default None (all)
    roi_match_map : ROIMatcherInputs, optional
        ROI matching patterns, by default None
    roi_ignore_case : bool, optional
        Whether to ignore case in ROI matching, by default True
    roi_handling_strategy : str | ROIMatchStrategy, optional
        Strategy for handling ROI matches, by default ROIMatchStrategy.MERGE
    roi_allow_multi_key_matches : bool, default=True
        Whether to allow one ROI to match multiple keys in the match_map.
    roi_on_missing_regex : str | ROIMatchFailurePolicy, optional
        How to handle when no ROI matches any pattern in match_map.

    Returns
    -------
    SampleInput
        Configured SampleInput instance
    """
    # Convert string strategy to enum if needed
    if isinstance(roi_handling_strategy, str):
        roi_handling_strategy = ROIMatchStrategy(
            roi_handling_strategy.lower()
        )

    if isinstance(roi_on_missing_regex, str):
        roi_on_missing_regex = ROIMatchFailurePolicy(
            roi_on_missing_regex.lower()
        )

    # Create the ROIMatcher
    roi_matcher = create_roi_matcher(
        roi_match_map,
        handling_strategy=roi_handling_strategy,
        ignore_case=roi_ignore_case,
        allow_multi_key_matches=roi_allow_multi_key_matches,
        on_missing_regex=roi_on_missing_regex,
    )
    num_jobs = n_jobs or max(1, multiprocessing.cpu_count() - 2)

    # Create the SampleInput
    return cls(
        directory=Path(directory),
        dataset_name=dataset_name,
        update_crawl=update_crawl,
        n_jobs=num_jobs,
        modalities=modalities,
        roi_matcher=roi_matcher,
    )

default classmethod #

default() -> 'SampleInput'

Create a default SampleInput instance.

Source code in src/imgtools/io/sample_input.py
@classmethod
def default(cls) -> "SampleInput":
    """Create a default SampleInput instance."""
    return cls.build(directory="./data")

query #

query(
    modalities: str | None = None,
) -> list[list[imgtools.dicom.interlacer.SeriesNode]]

Query the interlacer for a specific modality.

Source code in src/imgtools/io/sample_input.py
def query(self, modalities: str | None = None) -> list[list[SeriesNode]]:
    """Query the interlacer for a specific modality."""
    if modalities is None:
        modalities = ",".join(self.modalities) if self.modalities else "*"
    return self.interlacer.query(modalities)