Skip to content

Autopipeline utils

autopipeline_utils #

Utility functions for the autopipeline module.

Functions:

Name Description
save_pipeline_reports

Save pipeline reports including success/failure reports and simplified index.

PipelineResults dataclass #

PipelineResults(
    successful_results: typing.List[
        imgtools.autopipeline_utils.ResultType
    ],
    failed_results: typing.List[
        imgtools.autopipeline_utils.ResultType
    ],
    all_results: typing.List[
        imgtools.autopipeline_utils.ResultType
    ],
    timestamp: str | None = None,
)

Bases: typing.Generic[imgtools.autopipeline_utils.ResultType]

Class to store and handle pipeline processing results.

This class stores successful and failed results from processing samples through the autopipeline and provides methods for saving reports and generating summary statistics.

Parameters:

Name Type Description Default

successful_results #

typing.List[imgtools.autopipeline_utils.ResultType]

List of successful processing results

required

failed_results #

typing.List[imgtools.autopipeline_utils.ResultType]

List of failed processing results

required

all_results #

typing.List[imgtools.autopipeline_utils.ResultType]

List of all processing results

required

timestamp #

str

Timestamp for this run, by default current datetime

None

Methods:

Name Description
log_summary

Log summary information about the results.

to_dict

Convert results to a dictionary.

failure_count property #

failure_count: int

Number of failed results.

success_count property #

success_count: int

Number of successful results.

success_rate property #

success_rate: float

Success rate as a percentage.

total_count property #

total_count: int

Total number of results.

log_summary #

log_summary() -> None

Log summary information about the results.

Source code in src/imgtools/autopipeline_utils.py
def log_summary(self) -> None:
    """Log summary information about the results."""
    logger.info(
        f"Processing complete. {self.success_count} successful, {self.failure_count} failed "
        f"out of {self.total_count} total samples ({self.success_rate:.1f}% success rate)."
    )

to_dict #

to_dict() -> typing.Dict[
    str,
    typing.List[imgtools.autopipeline_utils.ResultType],
]

Convert results to a dictionary.

Source code in src/imgtools/autopipeline_utils.py
def to_dict(self) -> Dict[str, List[ResultType]]:
    """Convert results to a dictionary."""
    return {
        "success": self.successful_results,
        "failure": self.failed_results,
    }

save_pipeline_reports #

save_pipeline_reports(
    results: imgtools.autopipeline_utils.PipelineResults,
    index_file: "Path",
    root_dir_name: str,
    simplified_columns: typing.List[str],
    index_lock_check_func: (
        typing.Callable[[], "Path"] | None
    ) = None,
) -> typing.Dict[str, "Path"]

Save pipeline reports including success/failure reports and simplified index.

Parameters:

Name Type Description Default

results #

imgtools.autopipeline_utils.PipelineResults

The pipeline results to save

required

index_file #

pathlib.Path

Path to the index file

required

root_dir_name #

str

Name of the root directory for output

required

simplified_columns #

typing.List[str]

List of columns to include in the simplified index

required

index_lock_check_func #

callable

Function to check and remove index lock file

None

Returns:

Type Description
typing.Dict[str, pathlib.Path]

Dictionary of saved file paths

Source code in src/imgtools/autopipeline_utils.py
def save_pipeline_reports(
    results: PipelineResults,
    index_file: "Path",
    root_dir_name: str,
    simplified_columns: List[str],
    index_lock_check_func: Callable[[], "Path"] | None = None,
) -> Dict[str, "Path"]:
    """
    Save pipeline reports including success/failure reports and simplified index.

    Parameters
    ----------
    results : PipelineResults
        The pipeline results to save
    index_file : Path
        Path to the index file
    root_dir_name : str
        Name of the root directory for output
    simplified_columns : List[str]
        List of columns to include in the simplified index
    index_lock_check_func : callable, optional
        Function to check and remove index lock file

    Returns
    -------
    Dict[str, Path]
        Dictionary of saved file paths
    """
    # Log summary
    results.log_summary()

    # Generate report file names
    success_file = index_file.with_name(
        f"{root_dir_name}_successful_{results.timestamp}.json"
    )
    failure_file = index_file.with_name(
        f"{root_dir_name}_failed_{results.timestamp}.json"
    )

    # Write simplified index file
    simple_index = index_file.parent / f"{index_file.stem}-simple.csv"

    try:
        index_df = pd.read_csv(index_file)

        # Get columns in the order we want
        # If a column is not in the index_df, it will be filled with NaN
        index_df = index_df[simplified_columns]

        # Sort by 'filepath' to make it easier to read
        if "filepath" in index_df.columns:
            index_df = index_df.sort_values(by=["filepath"])

        index_df.to_csv(simple_index, index=False)
        logger.info(f"Index file saved to {simple_index}")
    except Exception as e:
        logger.error(f"Failed to create simplified index: {e}")

    # Remove lockfile if a function was provided
    # TODO:: probably a better way to do this
    if index_lock_check_func is not None:
        lock_file = index_lock_check_func()
        if lock_file is not None and lock_file.exists():
            lock_file.unlink()
            logger.debug(f"Lock file removed: {lock_file}")

    # Convert results to dictionaries for JSON serialization
    success_dicts = [result.to_dict() for result in results.successful_results]

    # Write success report
    with success_file.open("w", encoding="utf-8") as f:
        json.dump(success_dicts, f, indent=2)
    logger.info(f"Detailed success report saved to {success_file}")

    saved_files = {"success_file": success_file, "simple_index": simple_index}

    # If no failures, we can skip writing the failure file
    if results.failure_count == 0:
        return saved_files

    # Write failure report
    failure_dicts = [result.to_dict() for result in results.failed_results]
    with failure_file.open("w", encoding="utf-8") as f:
        json.dump(failure_dicts, f, indent=2)
    logger.info(f"Detailed failure report saved to {failure_file}")

    saved_files["failure_file"] = failure_file
    return saved_files