Source code for cleanvision.utils.serialize
from __future__ import annotations
import os
import pickle
import warnings
from copy import deepcopy
from typing import TYPE_CHECKING
import pandas as pd
import cleanvision
if TYPE_CHECKING: # pragma: no cover
from cleanvision import Imagelab
# Constants:
OBJECT_FILENAME = "imagelab.pkl"
ISSUES_FILENAME = "issues.csv"
ISSUE_SUMMARY_FILENAME = "issue_summary.csv"
[docs]class Serializer:
@staticmethod
def _save_issues(path: str, imagelab: Imagelab) -> None:
"""Saves the issues to disk."""
issues_path = os.path.join(path, ISSUES_FILENAME)
imagelab.issues.to_csv(issues_path)
issue_summary_path = os.path.join(path, ISSUE_SUMMARY_FILENAME)
imagelab.issue_summary.to_csv(issue_summary_path)
@staticmethod
def _validate_version(imagelab: Imagelab) -> None:
current_version = cleanvision.__version__
imagelab_version = imagelab.cleanvision_version
if current_version != imagelab_version:
warnings.warn(
f"Saved Imagelab was created using different version of cleanvision "
f"({imagelab_version}) than current version ({current_version}). "
f"Things may be broken!"
)
[docs] @classmethod
def serialize(cls, path: str, imagelab: Imagelab, force: bool) -> None:
"""Serializes the imagelab object to disk.
Parameters
----------
path : str
Path to save the imagelab object to.
imagelab : Imagelab
The imagelab object to save.
force : bool
If True, will overwrite existing files at the specified path.
Raises
------
FileExistsError
If `force` is set to False, and an existing path is specified for saving Imagelab instance.
"""
path_exists = os.path.exists(path)
if not path_exists:
os.mkdir(path)
else:
if not force:
raise FileExistsError("Please specify a new path or set force=True")
print(
f"WARNING: Existing files will be overwritten by newly saved files at: {path}"
)
# Save the issues to disk.
cls._save_issues(path=path, imagelab=imagelab)
# clear issues and issue_summary
imagelab_copy = deepcopy(imagelab)
imagelab_copy.issues = None
imagelab_copy.issue_summary = None
# Save the imagelab object to disk.
with open(os.path.join(path, OBJECT_FILENAME), "wb") as f:
pickle.dump(imagelab_copy, f)
print(f"Saved Imagelab to folder: {path}")
print(
"The data path and dataset must be not be changed to maintain consistent state when loading this Imagelab"
)
[docs] @classmethod
def deserialize(cls, path: str) -> Imagelab:
"""Deserializes the imagelab object from disk.
Parameters
----------
path: str
Path to the saved Imagelab folder previously specified in :py:meth:`_Serializer.serialize` (not the individual pickle file).
Returns
-------
Imagelab
Raises
------
ValueError:
If the path specified for imagelab folder does not exist
"""
if not os.path.exists(path):
raise ValueError(f"No folder found at specified path: {path}")
with open(os.path.join(path, OBJECT_FILENAME), "rb") as f:
imagelab: Imagelab = pickle.load(f)
cls._validate_version(imagelab)
# Load the issues from disk.
issues_path = os.path.join(path, ISSUES_FILENAME)
imagelab.issues = pd.read_csv(issues_path, index_col=0)
issue_summary_path = os.path.join(path, ISSUE_SUMMARY_FILENAME)
imagelab.issue_summary = pd.read_csv(issue_summary_path, index_col=0)
print("Successfully loaded Imagelab")
return imagelab