Autopipeline utils

autopipeline_utils #

Utility functions for the autopipeline module.

Functions:

Name	Description
`save_pipeline_reports`	Save pipeline reports including success/failure reports and simplified index.

PipelineResults `dataclass` #

PipelineResults(
    successful_results: typing.List[
        imgtools.autopipeline_utils.ResultType
    ],
    failed_results: typing.List[
        imgtools.autopipeline_utils.ResultType
    ],
    all_results: typing.List[
        imgtools.autopipeline_utils.ResultType
    ],
    timestamp: str | None = None,
)

Bases: typing.Generic[imgtools.autopipeline_utils.ResultType]

Class to store and handle pipeline processing results.

This class stores successful and failed results from processing samples through the autopipeline and provides methods for saving reports and generating summary statistics.

Parameters:

Name	Type	Description	Default
`successful_results` #	`typing.List[imgtools.autopipeline_utils.ResultType]`	List of successful processing results	required
`failed_results` #	`typing.List[imgtools.autopipeline_utils.ResultType]`	List of failed processing results	required
`all_results` #	`typing.List[imgtools.autopipeline_utils.ResultType]`	List of all processing results	required
`timestamp` #	`str`	Timestamp for this run, by default current datetime	`None`

Methods:

Name	Description
`log_summary`	Log summary information about the results.
`to_dict`	Convert results to a dictionary.

failure_count `property` #

failure_count: int

Number of failed results.

success_count `property` #

success_count: int

Number of successful results.

success_rate `property` #

success_rate: float

Success rate as a percentage.

total_count `property` #

total_count: int

Total number of results.

log_summary #

log_summary() -> None

Log summary information about the results.

Source code in src/imgtools/autopipeline_utils.py

def log_summary(self) -> None:
    """Log summary information about the results."""
    logger.info(
        f"Processing complete. {self.success_count} successful, {self.failure_count} failed "
        f"out of {self.total_count} total samples ({self.success_rate:.1f}% success rate)."
    )

to_dict #

to_dict() -> typing.Dict[
    str,
    typing.List[imgtools.autopipeline_utils.ResultType],
]

Convert results to a dictionary.

Source code in src/imgtools/autopipeline_utils.py

def to_dict(self) -> Dict[str, List[ResultType]]:
    """Convert results to a dictionary."""
    return {
        "success": self.successful_results,
        "failure": self.failed_results,
    }

save_pipeline_reports #

save_pipeline_reports(
    results: imgtools.autopipeline_utils.PipelineResults,
    index_file: "Path",
    root_dir_name: str,
    simplified_columns: typing.List[str],
    index_lock_check_func: (
        typing.Callable[[], "Path"] | None
    ) = None,
) -> typing.Dict[str, "Path"]

Save pipeline reports including success/failure reports and simplified index.

Parameters:

Name	Type	Description	Default
`results` #	`imgtools.autopipeline_utils.PipelineResults`	The pipeline results to save	required
`index_file` #	`pathlib.Path`	Path to the index file	required
`root_dir_name` #	`str`	Name of the root directory for output	required
`simplified_columns` #	`typing.List[str]`	List of columns to include in the simplified index	required
`index_lock_check_func` #	`callable`	Function to check and remove index lock file	`None`

Returns:

Type	Description
`typing.Dict[str, pathlib.Path]`	Dictionary of saved file paths

Source code in src/imgtools/autopipeline_utils.py

def save_pipeline_reports(
    results: PipelineResults,
    index_file: "Path",
    root_dir_name: str,
    simplified_columns: List[str],
    index_lock_check_func: Callable[[], "Path"] | None = None,
) -> Dict[str, "Path"]:
    """
    Save pipeline reports including success/failure reports and simplified index.

    Parameters
    ----------
    results : PipelineResults
        The pipeline results to save
    index_file : Path
        Path to the index file
    root_dir_name : str
        Name of the root directory for output
    simplified_columns : List[str]
        List of columns to include in the simplified index
    index_lock_check_func : callable, optional
        Function to check and remove index lock file

    Returns
    -------
    Dict[str, Path]
        Dictionary of saved file paths
    """
    # Log summary
    results.log_summary()

    # Generate report file names
    success_file = index_file.with_name(
        f"{root_dir_name}_successful_{results.timestamp}.json"
    )
    failure_file = index_file.with_name(
        f"{root_dir_name}_failed_{results.timestamp}.json"
    )

    # Write simplified index file
    simple_index = index_file.parent / f"{index_file.stem}-simple.csv"

    try:
        index_df = pd.read_csv(index_file)

        # Get columns in the order we want
        # If a column is not in the index_df, it will be filled with NaN
        simple_index_df = index_df.reindex(columns=simplified_columns)

        # Sort by 'filepath' to make it easier to read
        if "filepath" in simple_index_df.columns:
            simple_index_df = simple_index_df.sort_values(by=["filepath"])

        simple_index_df.to_csv(simple_index, index=False)
        logger.info(f"Index file saved to {simple_index}")
    except Exception as e:
        logger.error(f"Failed to create simplified index: {e}")

    # Remove lockfile if a function was provided
    # TODO:: probably a better way to do this
    if index_lock_check_func is not None:
        lock_file = index_lock_check_func()
        if lock_file is not None and lock_file.exists():
            lock_file.unlink()
            logger.debug(f"Lock file removed: {lock_file}")

    # Convert results to dictionaries for JSON serialization
    success_dicts = [result.to_dict() for result in results.successful_results]

    # Write success report
    with success_file.open("w", encoding="utf-8") as f:
        json.dump(success_dicts, f, indent=2)
    logger.info(f"Detailed success report saved to {success_file}")

    saved_files = {"success_file": success_file, "simple_index": simple_index}

    # If no failures, we can skip writing the failure file
    if results.failure_count == 0:
        return saved_files

    # Write failure report
    failure_dicts = [result.to_dict() for result in results.failed_results]
    with failure_file.open("w", encoding="utf-8") as f:
        json.dump(failure_dicts, f, indent=2)
    logger.info(f"Detailed failure report saved to {failure_file}")

    saved_files["failure_file"] = failure_file
    return saved_files

Autopipeline utils