Skip to content

Crawler

Crawler dataclass #

Crawler(
    dicom_dir: pathlib.Path,
    output_dir: pathlib.Path | None = None,
    dataset_name: str | None = None,
    n_jobs: int = 1,
    force: bool = False,
)

Crawl a DICOM directory and extract metadata.

Methods:

Name Description
crawl

Crawl the DICOM directory and extract metadata.

get_folder

Get the folder for a given series UID.

get_modality

Get the modality for a given series UID.

crawl_db property #

crawl_db: list[dict[str, str]]

Return the crawl database.

crawl_db_raw property #

crawl_db_raw: (
    imgtools.dicom.crawl.parse_dicoms.SeriesMetaMap
)

Return the crawl database raw.

crawl_results property #

crawl_results: (
    imgtools.dicom.crawl.parse_dicoms.ParseDicomDirResult
)

Get the crawl results, validating they're available first.

index property #

index: pandas.DataFrame

Return the index of the crawl results.

crawl #

crawl() -> None

Crawl the DICOM directory and extract metadata.

Source code in src/imgtools/dicom/crawl/crawler.py
def crawl(self) -> None:
    """Crawl the DICOM directory and extract metadata."""
    self.output_dir = (
        self.output_dir or self.dicom_dir.parent / ".imgtools"
    )
    self.output_dir.mkdir(parents=True, exist_ok=True)

    logger.info(
        "Starting DICOM crawl.",
        dicom_dir=self.dicom_dir,
        output_dir=self.output_dir,
        dataset_name=self.dataset_name,
    )

    with tqdm_logging_redirect():
        crawldb = parse_dicom_dir(
            dicom_dir=self.dicom_dir,
            output_dir=self.output_dir,
            dataset_name=self.dataset_name,
            n_jobs=self.n_jobs,
            force=self.force,
        )
    self._crawl_results = crawldb

get_folder #

get_folder(series_uid: str) -> str

Get the folder for a given series UID.

Source code in src/imgtools/dicom/crawl/crawler.py
def get_folder(self, series_uid: str) -> str:
    """Get the folder for a given series UID."""
    if series_uid not in self.crawl_results.crawl_db_raw:
        msg = f"Series UID {series_uid} not found in crawl results."
        raise ValueError(msg)

    data = self.crawl_results.crawl_db_raw[series_uid]
    first_subseries = next(iter(data.values()))
    return first_subseries["folder"]

get_modality #

get_modality(series_uid: str) -> str

Get the modality for a given series UID.

Source code in src/imgtools/dicom/crawl/crawler.py
def get_modality(self, series_uid: str) -> str:
    """Get the modality for a given series UID."""
    if series_uid not in self.crawl_results.crawl_db_raw:
        msg = f"Series UID {series_uid} not found in crawl results."
        raise ValueError(msg)

    data = self.crawl_results.crawl_db_raw[series_uid]
    first_subseries = next(iter(data.values()))
    return first_subseries["modality"]