Models

models #

RuleError #

Bases: Exception

Exception raised for invalid rules.

InvalidComparisonError #

Bases: imgnet.query.models.RuleError

Exception raised when a Rule has an invalid comparison type.

ValidQueryError #

Bases: Exception

Base exception for ValidQuery errors.

ModalitiesValidationError #

Bases: imgnet.query.models.ValidQueryError

Raised when modality field validation fails.

CollectionsValidationError #

Bases: imgnet.query.models.ValidQueryError

Raised when collections field validation fails.

RulesValidationError #

Bases: imgnet.query.models.ValidQueryError

Raised when rules field validation fails.

RulesValidationParsingError #

Bases: imgnet.query.models.RulesValidationError

Raised when parsing a Rule from string fails.

Rule #

Bases: pydantic.BaseModel

Comparison rule between one DICOM tag and a value or list of values.

evaluate #

evaluate(dicom_element: dict) -> bool

Evaluate whether a DICOM metadata dict is accepted by this rule.

Source code in src/imgnet/query/models.py

def evaluate(self, dicom_element: dict) -> bool:  # noqa: PLR0911 PLR0912
    """Evaluate whether a DICOM metadata dict is accepted by this rule."""
    tag_value = dicom_element.get(self.tag)
    if tag_value is None:
        return False

    if isinstance(tag_value, str):
        if tag_value.strip().startswith(
            "["
        ) and tag_value.strip().endswith("]"):
            matches = re.findall(
                r"""(['"])(.*?)\1|([^'",\s\[\]]+)""", tag_value
            )
            tag_value = [m[1] if m[1] else m[2] for m in matches]
        else:
            tag_value = [tag_value.strip()]

    match self.comparison:
        case "==" | "=":
            patterns = self.value
            if isinstance(self.value, str):
                patterns = [self.value]
            for element in tag_value:
                for pattern in patterns:
                    if re.match(pattern, element):
                        return True
            return False

        case "!=":
            patterns = self.value
            if isinstance(self.value, str):
                patterns = [self.value]
            for element in tag_value:
                for pattern in patterns:
                    if re.match(pattern, element):
                        return False
            return True

        case ">" | "<" | ">=" | "<=":
            op_fn = NUMERIC_OPS[self.comparison]
            if isinstance(self.value, list):
                msg = f"{self.comparison} comparison only compatible with numeric arguments, not list."
                raise InvalidComparisonError(msg)
            for element in tag_value:
                if element == "" or element is None:
                    return False
                try:
                    if not op_fn(float(element), float(self.value)):
                        return False
                except ValueError as exc:
                    msg = (
                        f"'{self.comparison}' comparisons only support numeric values."
                        f"\nInput: {self.tag}: {tag_value}, {self.comparison} {self.value}"
                    )
                    raise RuleError(msg) from exc
            return True

    return False

mask #

mask(df: pandas.DataFrame) -> pandas.Series

Return a boolean Series over df: True for rows accepted by this rule.

Vectorized counterpart of evaluate for bulk filtering.

Source code in src/imgnet/query/models.py

def mask(self, df: pd.DataFrame) -> pd.Series:
    """Return a boolean Series over *df*: True for rows accepted by this rule.

    Vectorized counterpart of ``evaluate`` for bulk filtering.
    """
    col = df.get(self.tag)
    if col is None:
        return pd.Series(False, index=df.index)

    col = col.astype(str)

    match self.comparison:
        case "==" | "=":
            patterns = (
                [self.value] if isinstance(self.value, str) else self.value
            )
            combined = "|".join(f"(?:{p})" for p in patterns)
            return col.str.match(combined, na=False)

        case "!=":
            patterns = (
                [self.value] if isinstance(self.value, str) else self.value
            )
            combined = "|".join(f"(?:{p})" for p in patterns)
            return ~col.str.match(combined, na=False)

        case ">" | "<" | ">=" | "<=":
            if isinstance(self.value, list):
                msg = f"{self.comparison} comparison only compatible with numeric arguments, not list."
                raise InvalidComparisonError(msg)
            op_fn = NUMERIC_OPS[self.comparison]
            return op_fn(
                pd.to_numeric(col, errors="coerce"), float(self.value)
            ).fillna(False)

    return pd.Series(False, index=df.index)

ValidQuery #

Bases: pydantic.BaseModel

Pydantic model representing a Med-ImageNet query.

process #

process(store: imgnet.collections.store.IndexedDatasets) -> pandas.DataFrame

Return a DataFrame containing selected SeriesInstanceUID rows.

Source code in src/imgnet/query/models.py

def process(self, store: IndexedDatasets) -> pd.DataFrame:
    """Return a DataFrame containing selected SeriesInstanceUID rows."""
    from imgnet.query.engine import run_query

    query_results = run_query(self, store)
    logger.info(f"Found {len(query_results)} matches to the query.")
    return query_results