Skip to content

Transforms (augmentations.transforms)

class CLAHE (clip_limit=4.0, tile_grid_size=(8, 8), always_apply=False, p=0.5) [view source on GitHub]

Apply Contrast Limited Adaptive Histogram Equalization to the input image.

Parameters:

Name Type Description
clip_limit Union[float, Tuple[float, float]]

upper threshold value for contrast limiting. If clip_limit is a single float value, the range will be (1, clip_limit). Default: (1, 4).

tile_grid_size Tuple[int, int]

size of grid for histogram equalization. Default: (8, 8).

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8

Source code in albumentations/augmentations/transforms.py
Python
class CLAHE(ImageOnlyTransform):
    """Apply Contrast Limited Adaptive Histogram Equalization to the input image.

    Args:
        clip_limit: upper threshold value for contrast limiting.
            If clip_limit is a single float value, the range will be (1, clip_limit). Default: (1, 4).
        tile_grid_size: size of grid for histogram equalization. Default: (8, 8).
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8

    """

    class InitSchema(BaseTransformInitSchema):
        clip_limit: OnePlusFloatRangeType = (1.0, 4.0)
        tile_grid_size: OnePlusIntRangeType = (8, 8)

    def __init__(
        self,
        clip_limit: ScaleFloatType = 4.0,
        tile_grid_size: Tuple[int, int] = (8, 8),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.clip_limit = cast(Tuple[float, float], clip_limit)
        self.tile_grid_size = tile_grid_size

    def apply(self, img: np.ndarray, clip_limit: float = 2, **params: Any) -> np.ndarray:
        if not is_rgb_image(img) and not is_grayscale_image(img):
            msg = "CLAHE transformation expects 1-channel or 3-channel images."
            raise TypeError(msg)

        return F.clahe(img, clip_limit, self.tile_grid_size)

    def get_params(self) -> Dict[str, float]:
        return {"clip_limit": random.uniform(self.clip_limit[0], self.clip_limit[1])}

    def get_transform_init_args_names(self) -> Tuple[str, str]:
        return ("clip_limit", "tile_grid_size")

apply (self, img, clip_limit=2, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, clip_limit: float = 2, **params: Any) -> np.ndarray:
    if not is_rgb_image(img) and not is_grayscale_image(img):
        msg = "CLAHE transformation expects 1-channel or 3-channel images."
        raise TypeError(msg)

    return F.clahe(img, clip_limit, self.tile_grid_size)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    return {"clip_limit": random.uniform(self.clip_limit[0], self.clip_limit[1])}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str]:
    return ("clip_limit", "tile_grid_size")

class ChannelShuffle [view source on GitHub]

Randomly rearrange channels of the input RGB image.

Parameters:

Name Type Description
p

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class ChannelShuffle(ImageOnlyTransform):
    """Randomly rearrange channels of the input RGB image.

    Args:
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    """

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def apply(self, img: np.ndarray, channels_shuffled: Tuple[int, int, int] = (0, 1, 2), **params: Any) -> np.ndarray:
        return F.channel_shuffle(img, channels_shuffled)

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
        img = params["image"]
        ch_arr = list(range(img.shape[2]))
        ch_arr = random_utils.shuffle(ch_arr)
        return {"channels_shuffled": ch_arr}

    def get_transform_init_args_names(self) -> Tuple[()]:
        return ()

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, channels_shuffled=(0, 1, 2), **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, channels_shuffled: Tuple[int, int, int] = (0, 1, 2), **params: Any) -> np.ndarray:
    return F.channel_shuffle(img, channels_shuffled)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
    img = params["image"]
    ch_arr = list(range(img.shape[2]))
    ch_arr = random_utils.shuffle(ch_arr)
    return {"channels_shuffled": ch_arr}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[()]:
    return ()

class ChromaticAberration (primary_distortion_limit=(-0.02, 0.02), secondary_distortion_limit=(-0.05, 0.05), mode='green_purple', interpolation=1, always_apply=False, p=0.5) [view source on GitHub]

Add lateral chromatic aberration by distorting the red and blue channels of the input image.

Parameters:

Name Type Description
primary_distortion_limit Union[float, Tuple[float, float]]

range of the primary radial distortion coefficient. If primary_distortion_limit is a single float value, the range will be (-primary_distortion_limit, primary_distortion_limit). Controls the distortion in the center of the image (positive values result in pincushion distortion, negative values result in barrel distortion). Default: 0.02.

secondary_distortion_limit Union[float, Tuple[float, float]]

range of the secondary radial distortion coefficient. If secondary_distortion_limit is a single float value, the range will be (-secondary_distortion_limit, secondary_distortion_limit). Controls the distortion in the corners of the image (positive values result in pincushion distortion, negative values result in barrel distortion). Default: 0.05.

mode Literal['green_purple', 'red_blue', 'random']

type of color fringing. Supported modes are 'green_purple', 'red_blue' and 'random'. 'random' will choose one of the modes 'green_purple' or 'red_blue' randomly. Default: 'green_purple'.

interpolation int

flag that is used to specify the interpolation algorithm. Should be one of: cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4. Default: cv2.INTER_LINEAR.

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class ChromaticAberration(ImageOnlyTransform):
    """Add lateral chromatic aberration by distorting the red and blue channels of the input image.

    Args:
        primary_distortion_limit: range of the primary radial distortion coefficient.
            If primary_distortion_limit is a single float value, the range will be
            (-primary_distortion_limit, primary_distortion_limit).
            Controls the distortion in the center of the image (positive values result in pincushion distortion,
            negative values result in barrel distortion).
            Default: 0.02.
        secondary_distortion_limit: range of the secondary radial distortion coefficient.
            If secondary_distortion_limit is a single float value, the range will be
            (-secondary_distortion_limit, secondary_distortion_limit).
            Controls the distortion in the corners of the image (positive values result in pincushion distortion,
            negative values result in barrel distortion).
            Default: 0.05.
        mode: type of color fringing.
            Supported modes are 'green_purple', 'red_blue' and 'random'.
            'random' will choose one of the modes 'green_purple' or 'red_blue' randomly.
            Default: 'green_purple'.
        interpolation: flag that is used to specify the interpolation algorithm. Should be one of:
            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
            Default: cv2.INTER_LINEAR.
        p: probability of applying the transform.
            Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        primary_distortion_limit: SymmetricRangeType = (-0.02, 0.02)
        secondary_distortion_limit: SymmetricRangeType = (-0.05, 0.05)
        mode: ChromaticAberrationMode = Field(default="green_purple", description="Type of color fringing.")
        interpolation: InterpolationType = cv2.INTER_LINEAR

    def __init__(
        self,
        primary_distortion_limit: ScaleFloatType = (-0.02, 0.02),
        secondary_distortion_limit: ScaleFloatType = (-0.05, 0.05),
        mode: ChromaticAberrationMode = "green_purple",
        interpolation: int = cv2.INTER_LINEAR,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.primary_distortion_limit = cast(Tuple[float, float], primary_distortion_limit)
        self.secondary_distortion_limit = cast(Tuple[float, float], secondary_distortion_limit)
        self.mode = mode
        self.interpolation = interpolation

    def apply(
        self,
        img: np.ndarray,
        primary_distortion_red: float = -0.02,
        secondary_distortion_red: float = -0.05,
        primary_distortion_blue: float = -0.02,
        secondary_distortion_blue: float = -0.05,
        **params: Any,
    ) -> np.ndarray:
        return F.chromatic_aberration(
            img,
            primary_distortion_red,
            secondary_distortion_red,
            primary_distortion_blue,
            secondary_distortion_blue,
            cast(int, self.interpolation),
        )

    def get_params(self) -> Dict[str, float]:
        primary_distortion_red = random_utils.uniform(*self.primary_distortion_limit)
        secondary_distortion_red = random_utils.uniform(*self.secondary_distortion_limit)
        primary_distortion_blue = random_utils.uniform(*self.primary_distortion_limit)
        secondary_distortion_blue = random_utils.uniform(*self.secondary_distortion_limit)

        secondary_distortion_red = self._match_sign(primary_distortion_red, secondary_distortion_red)
        secondary_distortion_blue = self._match_sign(primary_distortion_blue, secondary_distortion_blue)

        if self.mode == "green_purple":
            # distortion coefficients of the red and blue channels have the same sign
            primary_distortion_blue = self._match_sign(primary_distortion_red, primary_distortion_blue)
            secondary_distortion_blue = self._match_sign(secondary_distortion_red, secondary_distortion_blue)
        if self.mode == "red_blue":
            # distortion coefficients of the red and blue channels have the opposite sign
            primary_distortion_blue = self._unmatch_sign(primary_distortion_red, primary_distortion_blue)
            secondary_distortion_blue = self._unmatch_sign(secondary_distortion_red, secondary_distortion_blue)

        return {
            "primary_distortion_red": primary_distortion_red,
            "secondary_distortion_red": secondary_distortion_red,
            "primary_distortion_blue": primary_distortion_blue,
            "secondary_distortion_blue": secondary_distortion_blue,
        }

    @staticmethod
    def _match_sign(a: float, b: float) -> float:
        # Match the sign of b to a
        if (a < 0 < b) or (a > 0 > b):
            return -b
        return b

    @staticmethod
    def _unmatch_sign(a: float, b: float) -> float:
        # Unmatch the sign of b to a
        if (a < 0 and b < 0) or (a > 0 and b > 0):
            return -b
        return b

    def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
        return "primary_distortion_limit", "secondary_distortion_limit", "mode", "interpolation"

apply (self, img, primary_distortion_red=-0.02, secondary_distortion_red=-0.05, primary_distortion_blue=-0.02, secondary_distortion_blue=-0.05, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    primary_distortion_red: float = -0.02,
    secondary_distortion_red: float = -0.05,
    primary_distortion_blue: float = -0.02,
    secondary_distortion_blue: float = -0.05,
    **params: Any,
) -> np.ndarray:
    return F.chromatic_aberration(
        img,
        primary_distortion_red,
        secondary_distortion_red,
        primary_distortion_blue,
        secondary_distortion_blue,
        cast(int, self.interpolation),
    )

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    primary_distortion_red = random_utils.uniform(*self.primary_distortion_limit)
    secondary_distortion_red = random_utils.uniform(*self.secondary_distortion_limit)
    primary_distortion_blue = random_utils.uniform(*self.primary_distortion_limit)
    secondary_distortion_blue = random_utils.uniform(*self.secondary_distortion_limit)

    secondary_distortion_red = self._match_sign(primary_distortion_red, secondary_distortion_red)
    secondary_distortion_blue = self._match_sign(primary_distortion_blue, secondary_distortion_blue)

    if self.mode == "green_purple":
        # distortion coefficients of the red and blue channels have the same sign
        primary_distortion_blue = self._match_sign(primary_distortion_red, primary_distortion_blue)
        secondary_distortion_blue = self._match_sign(secondary_distortion_red, secondary_distortion_blue)
    if self.mode == "red_blue":
        # distortion coefficients of the red and blue channels have the opposite sign
        primary_distortion_blue = self._unmatch_sign(primary_distortion_red, primary_distortion_blue)
        secondary_distortion_blue = self._unmatch_sign(secondary_distortion_red, secondary_distortion_blue)

    return {
        "primary_distortion_red": primary_distortion_red,
        "secondary_distortion_red": secondary_distortion_red,
        "primary_distortion_blue": primary_distortion_blue,
        "secondary_distortion_blue": secondary_distortion_blue,
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
    return "primary_distortion_limit", "secondary_distortion_limit", "mode", "interpolation"

class ColorJitter (brightness=(0.8, 1), contrast=(0.8, 1), saturation=(0.8, 1), hue=(-0.5, 0.5), always_apply=False, p=0.5) [view source on GitHub]

Randomly changes the brightness, contrast, and saturation of an image. Compared to ColorJitter from torchvision, this transform gives a little bit different results because Pillow (used in torchvision) and OpenCV (used in Albumentations) transform an image to HSV format by different formulas. Another difference - Pillow uses uint8 overflow, but we use value saturation.

Parameters:

Name Type Description
brightness float or tuple of float (min, max

How much to jitter brightness. If float: brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness] If Tuple[float, float]] will be sampled from that range. Both values should be non negative numbers.

contrast float or tuple of float (min, max

How much to jitter contrast. If float: contrast_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness] If Tuple[float, float]] will be sampled from that range. Both values should be non negative numbers.

saturation float or tuple of float (min, max

How much to jitter saturation. If float: saturation_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness] If Tuple[float, float]] will be sampled from that range. Both values should be non negative numbers.

hue float or tuple of float (min, max

How much to jitter hue. If float: saturation_factor is chosen uniformly from [-hue, hue]. Should have 0 <= hue <= 0.5. If Tuple[float, float]] will be sampled from that range. Both values should be in range [-0.5, 0.5].

Source code in albumentations/augmentations/transforms.py
Python
class ColorJitter(ImageOnlyTransform):
    """Randomly changes the brightness, contrast, and saturation of an image. Compared to ColorJitter from torchvision,
    this transform gives a little bit different results because Pillow (used in torchvision) and OpenCV (used in
    Albumentations) transform an image to HSV format by different formulas. Another difference - Pillow uses uint8
    overflow, but we use value saturation.

    Args:
        brightness (float or tuple of float (min, max)): How much to jitter brightness.
            If float:
                brightness_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
            If Tuple[float, float]] will be sampled from that range. Both values should be non negative numbers.
        contrast (float or tuple of float (min, max)): How much to jitter contrast.
            If float:
                contrast_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
            If Tuple[float, float]] will be sampled from that range. Both values should be non negative numbers.
        saturation (float or tuple of float (min, max)): How much to jitter saturation.
            If float:
               saturation_factor is chosen uniformly from [max(0, 1 - brightness), 1 + brightness]
            If Tuple[float, float]] will be sampled from that range. Both values should be non negative numbers.
        hue (float or tuple of float (min, max)): How much to jitter hue.
            If float:
               saturation_factor is chosen uniformly from [-hue, hue]. Should have 0 <= hue <= 0.5.
            If Tuple[float, float]] will be sampled from that range. Both values should be in range [-0.5, 0.5].

    """

    class InitSchema(BaseTransformInitSchema):
        brightness: Annotated[ScaleFloatType, Field(default=0.2, description="Range for jittering brightness.")]
        contrast: Annotated[ScaleFloatType, Field(default=0.2, description="Range for jittering contrast.")]
        saturation: Annotated[ScaleFloatType, Field(default=0.2, description="Range for jittering saturation.")]
        hue: Annotated[ScaleFloatType, Field(default=0.2, description="Range for jittering hue.")]

        @field_validator("brightness", "contrast", "saturation", "hue")
        @classmethod
        def check_ranges(cls, value: ScaleFloatType, info: ValidationInfo) -> Tuple[float, float]:
            if info.field_name == "hue":
                bounds = -0.5, 0.5
                bias = 0
                clip = False
            elif info.field_name in ["brightness", "contrast", "saturation"]:
                bounds = 0, float("inf")
                bias = 1
                clip = True

            if isinstance(value, numbers.Number):
                if value < 0:
                    raise ValueError(f"If {info.field_name} is a single number, it must be non negative.")
                value = [bias - value, bias + value]
                if clip:
                    value[0] = max(value[0], 0)
            elif isinstance(value, (tuple, list)) and len(value) == PAIR:
                check_range(value, *bounds, info.field_name)

            return cast(Tuple[float, float], value)

    def __init__(
        self,
        brightness: ScaleFloatType = (0.8, 1),
        contrast: ScaleFloatType = (0.8, 1),
        saturation: ScaleFloatType = (0.8, 1),
        hue: ScaleFloatType = (-0.5, 0.5),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)

        self.brightness = cast(Tuple[float, float], brightness)
        self.contrast = cast(Tuple[float, float], contrast)
        self.saturation = cast(Tuple[float, float], saturation)
        self.hue = cast(Tuple[float, float], hue)

        self.transforms = [
            F.adjust_brightness_torchvision,
            F.adjust_contrast_torchvision,
            F.adjust_saturation_torchvision,
            F.adjust_hue_torchvision,
        ]

    def get_params(self) -> Dict[str, Any]:
        brightness = random.uniform(self.brightness[0], self.brightness[1])
        contrast = random.uniform(self.contrast[0], self.contrast[1])
        saturation = random.uniform(self.saturation[0], self.saturation[1])
        hue = random.uniform(self.hue[0], self.hue[1])

        order = [0, 1, 2, 3]
        order = random_utils.shuffle(order)

        return {
            "brightness": brightness,
            "contrast": contrast,
            "saturation": saturation,
            "hue": hue,
            "order": order,
        }

    def apply(
        self,
        img: np.ndarray,
        brightness: float = 1.0,
        contrast: float = 1.0,
        saturation: float = 1.0,
        hue: float = 0,
        order: Optional[List[int]] = None,
        **params: Any,
    ) -> np.ndarray:
        if order is None:
            order = [0, 1, 2, 3]
        if not is_rgb_image(img) and not is_grayscale_image(img):
            msg = "ColorJitter transformation expects 1-channel or 3-channel images."
            raise TypeError(msg)
        color_transforms = [brightness, contrast, saturation, hue]
        for i in order:
            img = self.transforms[i](img, color_transforms[i])  # type: ignore[operator]
        return img

    def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
        return ("brightness", "contrast", "saturation", "hue")

apply (self, img, brightness=1.0, contrast=1.0, saturation=1.0, hue=0, order=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    brightness: float = 1.0,
    contrast: float = 1.0,
    saturation: float = 1.0,
    hue: float = 0,
    order: Optional[List[int]] = None,
    **params: Any,
) -> np.ndarray:
    if order is None:
        order = [0, 1, 2, 3]
    if not is_rgb_image(img) and not is_grayscale_image(img):
        msg = "ColorJitter transformation expects 1-channel or 3-channel images."
        raise TypeError(msg)
    color_transforms = [brightness, contrast, saturation, hue]
    for i in order:
        img = self.transforms[i](img, color_transforms[i])  # type: ignore[operator]
    return img

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, Any]:
    brightness = random.uniform(self.brightness[0], self.brightness[1])
    contrast = random.uniform(self.contrast[0], self.contrast[1])
    saturation = random.uniform(self.saturation[0], self.saturation[1])
    hue = random.uniform(self.hue[0], self.hue[1])

    order = [0, 1, 2, 3]
    order = random_utils.shuffle(order)

    return {
        "brightness": brightness,
        "contrast": contrast,
        "saturation": saturation,
        "hue": hue,
        "order": order,
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
    return ("brightness", "contrast", "saturation", "hue")

class Downscale (scale_min=0.25, scale_max=0.25, interpolation=None, always_apply=False, p=0.5) [view source on GitHub]

Decreases image quality by downscaling and upscaling back.

Parameters:

Name Type Description
scale_min float

lower bound on the image scale. Should be <= scale_max.

scale_max float

upper bound on the image scale. Should be < 1.

interpolation Union[int, albumentations.core.transforms_interface.Interpolation, Dict[str, int]]

cv2 interpolation method. Could be: - single cv2 interpolation flag - selected method will be used for downscale and upscale. - dict(downscale=flag, upscale=flag) - Downscale.Interpolation(downscale=flag, upscale=flag) - Default: Interpolation(downscale=cv2.INTER_NEAREST, upscale=cv2.INTER_NEAREST)

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class Downscale(ImageOnlyTransform):
    """Decreases image quality by downscaling and upscaling back.

    Args:
        scale_min: lower bound on the image scale. Should be <= scale_max.
        scale_max: upper bound on the image scale. Should be < 1.
        interpolation: cv2 interpolation method. Could be:
            - single cv2 interpolation flag - selected method will be used for downscale and upscale.
            - dict(downscale=flag, upscale=flag)
            - Downscale.Interpolation(downscale=flag, upscale=flag) -
            Default: Interpolation(downscale=cv2.INTER_NEAREST, upscale=cv2.INTER_NEAREST)

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        scale_min: float = Field(default=0.25, ge=0, le=1, description="Lower bound on the image scale.")
        scale_max: float = Field(default=0.25, ge=0, lt=1, description="Upper bound on the image scale.")
        interpolation: Optional[Union[int, Interpolation, Dict[str, int]]] = Field(
            default_factory=lambda: Interpolation(downscale=cv2.INTER_NEAREST, upscale=cv2.INTER_NEAREST),
            description="CV2 interpolation method or a dictionary specifying downscale and upscale methods.",
        )

        @model_validator(mode="after")
        def validate_scale(self) -> Self:
            if self.scale_min > self.scale_max:
                msg = "scale_min must be less than or equal to scale_max"
                raise ValueError(msg)
            return self

        @field_validator("interpolation")
        @classmethod
        def set_interpolation(cls, v: Any) -> Interpolation:
            if isinstance(v, dict):
                return Interpolation(**v)
            if isinstance(v, int):
                return Interpolation(downscale=v, upscale=v)
            if isinstance(v, Interpolation):
                return v
            if v is None:
                return Interpolation(downscale=cv2.INTER_NEAREST, upscale=cv2.INTER_NEAREST)

            msg = (
                "Interpolation must be an int, Interpolation instance, "
                "or dict specifying downscale and upscale methods."
            )
            raise ValueError(msg)

    def __init__(
        self,
        scale_min: float = 0.25,
        scale_max: float = 0.25,
        interpolation: Optional[Union[int, Interpolation, Dict[str, int]]] = None,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.scale_min = scale_min
        self.scale_max = scale_max
        self.interpolation = cast(Interpolation, interpolation)

    def apply(self, img: np.ndarray, scale: float, **params: Any) -> np.ndarray:
        if isinstance(self.interpolation, int):
            msg = "Should not be here, added for typing purposes. Please report this issue."
            raise TypeError(msg)
        return F.downscale(
            img,
            scale=scale,
            down_interpolation=self.interpolation.downscale,
            up_interpolation=self.interpolation.upscale,
        )

    def get_params(self) -> Dict[str, Any]:
        return {"scale": random.uniform(self.scale_min, self.scale_max)}

    def get_transform_init_args_names(self) -> Tuple[str, str]:
        return "scale_min", "scale_max"

    def to_dict_private(self) -> Dict[str, Any]:
        if isinstance(self.interpolation, int):
            msg = "Should not be here, added for typing purposes. Please report this issue."
            raise TypeError(msg)
        result = super().to_dict_private()
        result["interpolation"] = {"upscale": self.interpolation.upscale, "downscale": self.interpolation.downscale}
        return result

apply (self, img, scale, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, scale: float, **params: Any) -> np.ndarray:
    if isinstance(self.interpolation, int):
        msg = "Should not be here, added for typing purposes. Please report this issue."
        raise TypeError(msg)
    return F.downscale(
        img,
        scale=scale,
        down_interpolation=self.interpolation.downscale,
        up_interpolation=self.interpolation.upscale,
    )

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, Any]:
    return {"scale": random.uniform(self.scale_min, self.scale_max)}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str]:
    return "scale_min", "scale_max"

class Emboss (alpha=(0.2, 0.5), strength=(0.2, 0.7), always_apply=False, p=0.5) [view source on GitHub]

Emboss the input image and overlays the result with the original image.

Parameters:

Name Type Description
alpha Tuple[float, float]

range to choose the visibility of the embossed image. At 0, only the original image is visible,at 1.0 only its embossed version is visible. Default: (0.2, 0.5).

strength Tuple[float, float]

strength range of the embossing. Default: (0.2, 0.7).

p float

probability of applying the transform. Default: 0.5.

Targets

image

Source code in albumentations/augmentations/transforms.py
Python
class Emboss(ImageOnlyTransform):
    """Emboss the input image and overlays the result with the original image.

    Args:
        alpha: range to choose the visibility of the embossed image. At 0, only the original image is
            visible,at 1.0 only its embossed version is visible. Default: (0.2, 0.5).
        strength: strength range of the embossing. Default: (0.2, 0.7).
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    """

    class InitSchema(BaseTransformInitSchema):
        alpha: ZeroOneRangeType = (0.2, 0.5)
        strength: NonNegativeFloatRangeType = (0.2, 0.7)

    def __init__(
        self,
        alpha: Tuple[float, float] = (0.2, 0.5),
        strength: Tuple[float, float] = (0.2, 0.7),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.alpha = alpha
        self.strength = strength

    @staticmethod
    def __generate_emboss_matrix(alpha_sample: np.ndarray, strength_sample: np.ndarray) -> np.ndarray:
        matrix_nochange = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32)
        matrix_effect = np.array(
            [
                [-1 - strength_sample, 0 - strength_sample, 0],
                [0 - strength_sample, 1, 0 + strength_sample],
                [0, 0 + strength_sample, 1 + strength_sample],
            ],
            dtype=np.float32,
        )
        return (1 - alpha_sample) * matrix_nochange + alpha_sample * matrix_effect

    def get_params(self) -> Dict[str, np.ndarray]:
        alpha = random.uniform(*self.alpha)
        strength = random.uniform(*self.strength)
        emboss_matrix = self.__generate_emboss_matrix(alpha_sample=alpha, strength_sample=strength)
        return {"emboss_matrix": emboss_matrix}

    def apply(self, img: np.ndarray, emboss_matrix: Optional[np.ndarray] = None, **params: Any) -> np.ndarray:
        return F.convolve(img, emboss_matrix)

    def get_transform_init_args_names(self) -> Tuple[str, str]:
        return ("alpha", "strength")

apply (self, img, emboss_matrix=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, emboss_matrix: Optional[np.ndarray] = None, **params: Any) -> np.ndarray:
    return F.convolve(img, emboss_matrix)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, np.ndarray]:
    alpha = random.uniform(*self.alpha)
    strength = random.uniform(*self.strength)
    emboss_matrix = self.__generate_emboss_matrix(alpha_sample=alpha, strength_sample=strength)
    return {"emboss_matrix": emboss_matrix}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str]:
    return ("alpha", "strength")

class Equalize (mode='cv', by_channels=True, mask=None, mask_params=(), always_apply=False, p=0.5) [view source on GitHub]

Equalize the image histogram.

Parameters:

Name Type Description
mode str

{'cv', 'pil'}. Use OpenCV or Pillow equalization method.

by_channels bool

If True, use equalization by channels separately, else convert image to YCbCr representation and use equalization by Y channel.

mask np.ndarray, callable

If given, only the pixels selected by the mask are included in the analysis. Maybe 1 channel or 3 channel array or callable. Function signature must include image argument.

mask_params list of str

Params for mask function.

Targets

image

Image types: uint8

Source code in albumentations/augmentations/transforms.py
Python
class Equalize(ImageOnlyTransform):
    """Equalize the image histogram.

    Args:
        mode (str): {'cv', 'pil'}. Use OpenCV or Pillow equalization method.
        by_channels (bool): If True, use equalization by channels separately,
            else convert image to YCbCr representation and use equalization by `Y` channel.
        mask (np.ndarray, callable): If given, only the pixels selected by
            the mask are included in the analysis. Maybe 1 channel or 3 channel array or callable.
            Function signature must include `image` argument.
        mask_params (list of str): Params for mask function.

    Targets:
        image

    Image types:
        uint8

    """

    class InitSchema(BaseTransformInitSchema):
        mode: ImageMode = "cv"
        by_channels: Annotated[bool, Field(default=True, description="Equalize channels separately if True")]
        mask: Annotated[
            Optional[Union[np.ndarray, Callable[..., Any]]],
            Field(default=None, description="Mask to apply for equalization"),
        ]
        mask_params: Annotated[Sequence[str], Field(default=[], description="Parameters for mask function")]

    def __init__(
        self,
        mode: ImageMode = "cv",
        by_channels: bool = True,
        mask: Optional[Union[np.ndarray, Callable[..., Any]]] = None,
        mask_params: Sequence[str] = (),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)

        self.mode = mode
        self.by_channels = by_channels
        self.mask = mask
        self.mask_params = mask_params

    def apply(self, img: np.ndarray, mask: Optional[np.ndarray] = None, **params: Any) -> np.ndarray:
        return F.equalize(img, mode=self.mode, by_channels=self.by_channels, mask=mask)

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
        if not callable(self.mask):
            return {"mask": self.mask}

        return {"mask": self.mask(**params)}

    @property
    def targets_as_params(self) -> List[str]:
        return ["image", *list(self.mask_params)]

    def get_transform_init_args_names(self) -> Tuple[str, ...]:
        return ("mode", "by_channels", "mask", "mask_params")

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, mask=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, mask: Optional[np.ndarray] = None, **params: Any) -> np.ndarray:
    return F.equalize(img, mode=self.mode, by_channels=self.by_channels, mask=mask)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
    if not callable(self.mask):
        return {"mask": self.mask}

    return {"mask": self.mask(**params)}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, ...]:
    return ("mode", "by_channels", "mask", "mask_params")

class FancyPCA (alpha=0.1, always_apply=False, p=0.5) [view source on GitHub]

Augment RGB image using FancyPCA from Krizhevsky's paper "ImageNet Classification with Deep Convolutional Neural Networks"

Parameters:

Name Type Description
alpha float

how much to perturb/scale the eigen vecs and vals. scale is samples from gaussian distribution (mu=0, sigma=alpha)

Targets

image

Image types: 3-channel uint8 images only

Source code in albumentations/augmentations/transforms.py
Python
class FancyPCA(ImageOnlyTransform):
    """Augment RGB image using FancyPCA from Krizhevsky's paper
    "ImageNet Classification with Deep Convolutional Neural Networks"

    Args:
        alpha:  how much to perturb/scale the eigen vecs and vals.
            scale is samples from gaussian distribution (mu=0, sigma=alpha)

    Targets:
        image

    Image types:
        3-channel uint8 images only

    Credit:
        http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf
        https://deshanadesai.github.io/notes/Fancy-PCA-with-Scikit-Image
        https://pixelatedbrian.github.io/2018-04-29-fancy_pca/

    """

    class InitSchema(BaseTransformInitSchema):
        alpha: float = Field(default=0.1, description="Scale for perturbing the eigen vectors and values", ge=0)

    def __init__(self, alpha: float = 0.1, always_apply: bool = False, p: float = 0.5):
        super().__init__(always_apply=always_apply, p=p)
        self.alpha = alpha

    def apply(self, img: np.ndarray, alpha: float = 0.1, **params: Any) -> np.ndarray:
        return F.fancy_pca(img, alpha)

    def get_params(self) -> Dict[str, float]:
        return {"alpha": random.gauss(0, self.alpha)}

    def get_transform_init_args_names(self) -> Tuple[str]:
        return ("alpha",)

apply (self, img, alpha=0.1, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, alpha: float = 0.1, **params: Any) -> np.ndarray:
    return F.fancy_pca(img, alpha)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    return {"alpha": random.gauss(0, self.alpha)}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str]:
    return ("alpha",)

class FromFloat (dtype='uint16', max_value=None, always_apply=False, p=1.0) [view source on GitHub]

Take an input array where all values should lie in the range [0, 1.0], multiply them by max_value and then cast the resulted value to a type specified by dtype. If max_value is None the transform will try to infer the maximum value for the data type from the dtype argument.

This is the inverse transform for :class:~albumentations.augmentations.transforms.ToFloat.

Parameters:

Name Type Description
max_value Optional[float]

maximum possible input value. Default: None.

dtype typing_extensions.Literal['uint8', 'uint16', 'float32', 'float64']

data type of the output. See the 'Data types' page from the NumPy docs_. Default: 'uint16'.

p float

probability of applying the transform. Default: 1.0.

Targets

image

Image types: float32

.. _'Data types' page from the NumPy docs: https://docs.scipy.org/doc/numpy/user/basics.types.html

Source code in albumentations/augmentations/transforms.py
Python
class FromFloat(ImageOnlyTransform):
    """Take an input array where all values should lie in the range [0, 1.0], multiply them by `max_value` and then
    cast the resulted value to a type specified by `dtype`. If `max_value` is None the transform will try to infer
    the maximum value for the data type from the `dtype` argument.

    This is the inverse transform for :class:`~albumentations.augmentations.transforms.ToFloat`.

    Args:
        max_value: maximum possible input value. Default: None.
        dtype: data type of the output. See the `'Data types' page from the NumPy docs`_.
            Default: 'uint16'.
        p: probability of applying the transform. Default: 1.0.

    Targets:
        image

    Image types:
        float32

    .. _'Data types' page from the NumPy docs:
       https://docs.scipy.org/doc/numpy/user/basics.types.html

    """

    class InitSchema(BaseTransformInitSchema):
        dtype: Literal["uint8", "uint16", "float32", "float64"]
        max_value: Optional[float] = Field(default=None, description="Maximum possible input value.")
        p: ProbabilityType = 1

    def __init__(
        self,
        dtype: Literal["uint8", "uint16", "float32", "float64"] = "uint16",
        max_value: Optional[float] = None,
        always_apply: bool = False,
        p: float = 1.0,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.dtype = np.dtype(dtype)
        self.max_value = max_value

    def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
        return F.from_float(img, self.dtype, self.max_value)

    def get_transform_init_args(self) -> Dict[str, Any]:
        return {"dtype": self.dtype.name, "max_value": self.max_value}

apply (self, img, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
    return F.from_float(img, self.dtype, self.max_value)

class GaussNoise (var_limit=(10.0, 50.0), mean=0, per_channel=True, always_apply=False, p=0.5) [view source on GitHub]

Apply gaussian noise to the input image.

Parameters:

Name Type Description
var_limit Union[float, Tuple[float, float]]

variance range for noise. If var_limit is a single float, the range will be (0, var_limit). Default: (10.0, 50.0).

mean float

mean of the noise. Default: 0

per_channel bool

if set to True, noise will be sampled for each channel independently. Otherwise, the noise will be sampled once for all channels. Default: True

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class GaussNoise(ImageOnlyTransform):
    """Apply gaussian noise to the input image.

    Args:
        var_limit: variance range for noise. If var_limit is a single float, the range
            will be (0, var_limit). Default: (10.0, 50.0).
        mean: mean of the noise. Default: 0
        per_channel: if set to True, noise will be sampled for each channel independently.
            Otherwise, the noise will be sampled once for all channels. Default: True
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        var_limit: NonNegativeFloatRangeType = Field(default=(10.0, 50.0), description="Variance range for noise.")
        mean: float = Field(default=0, description="Mean of the noise.")
        per_channel: bool = Field(default=True, description="Apply noise per channel.")

    def __init__(
        self,
        var_limit: ScaleFloatType = (10.0, 50.0),
        mean: float = 0,
        per_channel: bool = True,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.var_limit = cast(Tuple[float, float], var_limit)
        self.mean = mean
        self.per_channel = per_channel

    def apply(self, img: np.ndarray, gauss: Optional[float] = None, **params: Any) -> np.ndarray:
        return F.gauss_noise(img, gauss=gauss)

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, float]:
        image = params["image"]
        var = random.uniform(self.var_limit[0], self.var_limit[1])
        sigma = var**0.5

        if self.per_channel:
            gauss = random_utils.normal(self.mean, sigma, image.shape)
        else:
            gauss = random_utils.normal(self.mean, sigma, image.shape[:2])
            if len(image.shape) > GRAYSCALE_SHAPE_LEN:
                gauss = np.expand_dims(gauss, -1)

        return {"gauss": gauss}

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_transform_init_args_names(self) -> Tuple[str, str, str]:
        return ("var_limit", "per_channel", "mean")

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, gauss=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, gauss: Optional[float] = None, **params: Any) -> np.ndarray:
    return F.gauss_noise(img, gauss=gauss)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, float]:
    image = params["image"]
    var = random.uniform(self.var_limit[0], self.var_limit[1])
    sigma = var**0.5

    if self.per_channel:
        gauss = random_utils.normal(self.mean, sigma, image.shape)
    else:
        gauss = random_utils.normal(self.mean, sigma, image.shape[:2])
        if len(image.shape) > GRAYSCALE_SHAPE_LEN:
            gauss = np.expand_dims(gauss, -1)

    return {"gauss": gauss}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str]:
    return ("var_limit", "per_channel", "mean")

class HueSaturationValue (hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, always_apply=False, p=0.5) [view source on GitHub]

Randomly change hue, saturation and value of the input image.

Parameters:

Name Type Description
hue_shift_limit Union[int, Tuple[int, int]]

range for changing hue. If hue_shift_limit is a single int, the range will be (-hue_shift_limit, hue_shift_limit). Default: (-20, 20).

sat_shift_limit Union[int, Tuple[int, int]]

range for changing saturation. If sat_shift_limit is a single int, the range will be (-sat_shift_limit, sat_shift_limit). Default: (-30, 30).

val_shift_limit Union[int, Tuple[int, int]]

range for changing value. If val_shift_limit is a single int, the range will be (-val_shift_limit, val_shift_limit). Default: (-20, 20).

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class HueSaturationValue(ImageOnlyTransform):
    """Randomly change hue, saturation and value of the input image.

    Args:
        hue_shift_limit: range for changing hue. If hue_shift_limit is a single int, the range
            will be (-hue_shift_limit, hue_shift_limit). Default: (-20, 20).
        sat_shift_limit: range for changing saturation. If sat_shift_limit is a single int,
            the range will be (-sat_shift_limit, sat_shift_limit). Default: (-30, 30).
        val_shift_limit: range for changing value. If val_shift_limit is a single int, the range
            will be (-val_shift_limit, val_shift_limit). Default: (-20, 20).
        p (float): probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        hue_shift_limit: SymmetricRangeType = (-20, 20)
        sat_shift_limit: SymmetricRangeType = (-30, 30)
        val_shift_limit: SymmetricRangeType = (-20, 20)

    def __init__(
        self,
        hue_shift_limit: ScaleIntType = 20,
        sat_shift_limit: ScaleIntType = 30,
        val_shift_limit: ScaleIntType = 20,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.hue_shift_limit = cast(Tuple[float, float], hue_shift_limit)
        self.sat_shift_limit = cast(Tuple[float, float], sat_shift_limit)
        self.val_shift_limit = cast(Tuple[float, float], val_shift_limit)

    def apply(
        self,
        img: np.ndarray,
        hue_shift: int = 0,
        sat_shift: int = 0,
        val_shift: int = 0,
        **params: Any,
    ) -> np.ndarray:
        if not is_rgb_image(img) and not is_grayscale_image(img):
            msg = "HueSaturationValue transformation expects 1-channel or 3-channel images."
            raise TypeError(msg)
        return F.shift_hsv(img, hue_shift, sat_shift, val_shift)

    def get_params(self) -> Dict[str, float]:
        return {
            "hue_shift": random.uniform(self.hue_shift_limit[0], self.hue_shift_limit[1]),
            "sat_shift": random.uniform(self.sat_shift_limit[0], self.sat_shift_limit[1]),
            "val_shift": random.uniform(self.val_shift_limit[0], self.val_shift_limit[1]),
        }

    def get_transform_init_args_names(self) -> Tuple[str, str, str]:
        return ("hue_shift_limit", "sat_shift_limit", "val_shift_limit")

apply (self, img, hue_shift=0, sat_shift=0, val_shift=0, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    hue_shift: int = 0,
    sat_shift: int = 0,
    val_shift: int = 0,
    **params: Any,
) -> np.ndarray:
    if not is_rgb_image(img) and not is_grayscale_image(img):
        msg = "HueSaturationValue transformation expects 1-channel or 3-channel images."
        raise TypeError(msg)
    return F.shift_hsv(img, hue_shift, sat_shift, val_shift)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    return {
        "hue_shift": random.uniform(self.hue_shift_limit[0], self.hue_shift_limit[1]),
        "sat_shift": random.uniform(self.sat_shift_limit[0], self.sat_shift_limit[1]),
        "val_shift": random.uniform(self.val_shift_limit[0], self.val_shift_limit[1]),
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str]:
    return ("hue_shift_limit", "sat_shift_limit", "val_shift_limit")

class ISONoise (color_shift=(0.01, 0.05), intensity=(0.1, 0.5), always_apply=False, p=0.5) [view source on GitHub]

Apply camera sensor noise.

Parameters:

Name Type Description
color_shift float, float

variance range for color hue change. Measured as a fraction of 360 degree Hue angle in HLS colorspace.

intensity float, float

Multiplicative factor that control strength of color and luminace noise.

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8

Source code in albumentations/augmentations/transforms.py
Python
class ISONoise(ImageOnlyTransform):
    """Apply camera sensor noise.

    Args:
        color_shift (float, float): variance range for color hue change.
            Measured as a fraction of 360 degree Hue angle in HLS colorspace.
        intensity ((float, float): Multiplicative factor that control strength
            of color and luminace noise.
        p (float): probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8

    """

    class InitSchema(BaseTransformInitSchema):
        color_shift: Tuple[float, float] = Field(
            default=(0.01, 0.05),
            description=(
                "Variance range for color hue change. Measured as a fraction of 360 degree Hue angle in HLS colorspace."
            ),
        )
        intensity: Tuple[float, float] = Field(
            default=(0.1, 0.5),
            description="Multiplicative factor that control strength of color and luminance noise.",
        )

    def __init__(
        self,
        color_shift: Tuple[float, float] = (0.01, 0.05),
        intensity: Tuple[float, float] = (0.1, 0.5),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.intensity = intensity
        self.color_shift = color_shift

    def apply(
        self,
        img: np.ndarray,
        color_shift: float = 0.05,
        intensity: float = 1.0,
        random_state: Optional[int] = None,
        **params: Any,
    ) -> np.ndarray:
        return F.iso_noise(img, color_shift, intensity, np.random.RandomState(random_state))

    def get_params(self) -> Dict[str, Any]:
        return {
            "color_shift": random_utils.uniform(self.color_shift[0], self.color_shift[1]),
            "intensity": random_utils.uniform(self.intensity[0], self.intensity[1]),
            "random_state": random_utils.randint(0, 65536),
        }

    def get_transform_init_args_names(self) -> Tuple[str, str]:
        return ("intensity", "color_shift")

apply (self, img, color_shift=0.05, intensity=1.0, random_state=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    color_shift: float = 0.05,
    intensity: float = 1.0,
    random_state: Optional[int] = None,
    **params: Any,
) -> np.ndarray:
    return F.iso_noise(img, color_shift, intensity, np.random.RandomState(random_state))

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, Any]:
    return {
        "color_shift": random_utils.uniform(self.color_shift[0], self.color_shift[1]),
        "intensity": random_utils.uniform(self.intensity[0], self.intensity[1]),
        "random_state": random_utils.randint(0, 65536),
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str]:
    return ("intensity", "color_shift")

class ImageCompression (quality_lower=99, quality_upper=100, compression_type=<ImageCompressionType.JPEG: 0>, always_apply=False, p=0.5) [view source on GitHub]

Decreases image quality by Jpeg, WebP compression of an image.

Parameters:

Name Type Description
quality_lower int

lower bound on the image quality. Should be in [0, 100] range for jpeg and [1, 100] for webp.

quality_upper int

upper bound on the image quality. Should be in [0, 100] range for jpeg and [1, 100] for webp.

compression_type ImageCompressionType

should be ImageCompressionType.JPEG or ImageCompressionType.WEBP. Default: ImageCompressionType.JPEG

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class ImageCompression(ImageOnlyTransform):
    """Decreases image quality by Jpeg, WebP compression of an image.

    Args:
        quality_lower: lower bound on the image quality. Should be in [0, 100] range for jpeg and [1, 100] for webp.
        quality_upper: upper bound on the image quality. Should be in [0, 100] range for jpeg and [1, 100] for webp.
        compression_type (ImageCompressionType): should be ImageCompressionType.JPEG or ImageCompressionType.WEBP.
            Default: ImageCompressionType.JPEG

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        quality_lower: int = Field(default=99, description="Lower bound on the image quality", ge=1, le=100)
        quality_upper: int = Field(default=100, description="Upper bound on the image quality", ge=1, le=100)
        compression_type: ImageCompressionType = Field(
            default=ImageCompressionType.JPEG,
            description="Image compression format",
        )

        @model_validator(mode="after")
        def validate_quality(self) -> Self:
            if self.quality_lower >= self.quality_upper:
                msg = "quality_lower must be less than quality_upper"
                raise ValueError(msg)
            return self

    def __init__(
        self,
        quality_lower: int = 99,
        quality_upper: int = 100,
        compression_type: ImageCompressionType = ImageCompressionType.JPEG,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply, p)

        self.quality_lower = quality_lower
        self.quality_upper = quality_upper
        self.compression_type = compression_type

    def apply(self, img: np.ndarray, quality: int = 100, image_type: str = ".jpg", **params: Any) -> np.ndarray:
        if img.ndim != GRAYSCALE_SHAPE_LEN and img.shape[-1] not in (1, 3, 4):
            msg = "ImageCompression transformation expects 1, 3 or 4 channel images."
            raise TypeError(msg)
        return F.image_compression(img, quality, image_type)

    def get_params(self) -> Dict[str, Any]:
        image_type = ".jpg"

        if self.compression_type == ImageCompressionType.WEBP:
            image_type = ".webp"

        return {
            "quality": random_utils.randint(self.quality_lower, self.quality_upper + 1),
            "image_type": image_type,
        }

    def get_transform_init_args(self) -> Dict[str, Any]:
        return {
            "quality_lower": self.quality_lower,
            "quality_upper": self.quality_upper,
            "compression_type": self.compression_type.value,
        }

apply (self, img, quality=100, image_type='.jpg', **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, quality: int = 100, image_type: str = ".jpg", **params: Any) -> np.ndarray:
    if img.ndim != GRAYSCALE_SHAPE_LEN and img.shape[-1] not in (1, 3, 4):
        msg = "ImageCompression transformation expects 1, 3 or 4 channel images."
        raise TypeError(msg)
    return F.image_compression(img, quality, image_type)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, Any]:
    image_type = ".jpg"

    if self.compression_type == ImageCompressionType.WEBP:
        image_type = ".webp"

    return {
        "quality": random_utils.randint(self.quality_lower, self.quality_upper + 1),
        "image_type": image_type,
    }

class InvertImg [view source on GitHub]

Invert the input image by subtracting pixel values from max values of the image types, i.e., 255 for uint8 and 1.0 for float32.

Parameters:

Name Type Description
p

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class InvertImg(ImageOnlyTransform):
    """Invert the input image by subtracting pixel values from max values of the image types,
    i.e., 255 for uint8 and 1.0 for float32.

    Args:
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    """

    def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
        return F.invert(img)

    def get_transform_init_args_names(self) -> Tuple[()]:
        return ()

apply (self, img, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
    return F.invert(img)

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[()]:
    return ()

class Lambda (image=None, mask=None, keypoint=None, bbox=None, global_label=None, name=None, always_apply=False, p=1.0) [view source on GitHub]

A flexible transformation class for using user-defined transformation functions per targets. Function signature must include **kwargs to accept optional arguments like interpolation method, image size, etc:

Parameters:

Name Type Description
image Optional[Callable[..., Any]]

Image transformation function.

mask Optional[Callable[..., Any]]

Mask transformation function.

keypoint Optional[Callable[..., Any]]

Keypoint transformation function.

bbox Optional[Callable[..., Any]]

BBox transformation function.

global_label Optional[Callable[..., Any]]

Global label transformation function.

always_apply bool

Indicates whether this transformation should be always applied.

p float

probability of applying the transform. Default: 1.0.

Targets

image, mask, bboxes, keypoints, global_label

Image types: Any

Source code in albumentations/augmentations/transforms.py
Python
class Lambda(NoOp):
    """A flexible transformation class for using user-defined transformation functions per targets.
    Function signature must include **kwargs to accept optional arguments like interpolation method, image size, etc:

    Args:
        image: Image transformation function.
        mask: Mask transformation function.
        keypoint: Keypoint transformation function.
        bbox: BBox transformation function.
        global_label: Global label transformation function.
        always_apply: Indicates whether this transformation should be always applied.
        p: probability of applying the transform. Default: 1.0.

    Targets:
        image, mask, bboxes, keypoints, global_label

    Image types:
        Any

    """

    def __init__(
        self,
        image: Optional[Callable[..., Any]] = None,
        mask: Optional[Callable[..., Any]] = None,
        keypoint: Optional[Callable[..., Any]] = None,
        bbox: Optional[Callable[..., Any]] = None,
        global_label: Optional[Callable[..., Any]] = None,
        name: Optional[str] = None,
        always_apply: bool = False,
        p: float = 1.0,
    ):
        super().__init__(always_apply, p)

        self.name = name
        self.custom_apply_fns = {
            target_name: F.noop for target_name in ("image", "mask", "keypoint", "bbox", "global_label")
        }
        for target_name, custom_apply_fn in {
            "image": image,
            "mask": mask,
            "keypoint": keypoint,
            "bbox": bbox,
            "global_label": global_label,
        }.items():
            if custom_apply_fn is not None:
                if isinstance(custom_apply_fn, LambdaType) and custom_apply_fn.__name__ == "<lambda>":
                    warnings.warn(
                        "Using lambda is incompatible with multiprocessing. "
                        "Consider using regular functions or partial().",
                    )

                self.custom_apply_fns[target_name] = custom_apply_fn

    def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
        fn = self.custom_apply_fns["image"]
        return fn(img, **params)

    def apply_to_mask(self, mask: np.ndarray, **params: Any) -> np.ndarray:
        fn = self.custom_apply_fns["mask"]
        return fn(mask, **params)

    def apply_to_bbox(self, bbox: BoxInternalType, **params: Any) -> BoxInternalType:
        fn = self.custom_apply_fns["bbox"]
        return fn(bbox, **params)

    def apply_to_keypoint(self, keypoint: KeypointInternalType, **params: Any) -> KeypointInternalType:
        fn = self.custom_apply_fns["keypoint"]
        return fn(keypoint, **params)

    def apply_to_global_label(self, label: np.ndarray, **params: Any) -> np.ndarray:
        fn = self.custom_apply_fns["global_label"]
        return fn(label, **params)

    @classmethod
    def is_serializable(cls) -> bool:
        return False

    def to_dict_private(self) -> Dict[str, Any]:
        if self.name is None:
            msg = (
                "To make a Lambda transform serializable you should provide the `name` argument, "
                "e.g. `Lambda(name='my_transform', image=<some func>, ...)`."
            )
            raise ValueError(msg)
        return {"__class_fullname__": self.get_class_fullname(), "__name__": self.name}

    def __repr__(self) -> str:
        state = {"name": self.name}
        state.update(self.custom_apply_fns.items())  # type: ignore[arg-type]
        state.update(self.get_base_init_args())
        return f"{self.__class__.__name__}({format_args(state)})"

__init__ (self, image=None, mask=None, keypoint=None, bbox=None, global_label=None, name=None, always_apply=False, p=1.0) special

Initialize self. See help(type(self)) for accurate signature.

Source code in albumentations/augmentations/transforms.py
Python
def __init__(
    self,
    image: Optional[Callable[..., Any]] = None,
    mask: Optional[Callable[..., Any]] = None,
    keypoint: Optional[Callable[..., Any]] = None,
    bbox: Optional[Callable[..., Any]] = None,
    global_label: Optional[Callable[..., Any]] = None,
    name: Optional[str] = None,
    always_apply: bool = False,
    p: float = 1.0,
):
    super().__init__(always_apply, p)

    self.name = name
    self.custom_apply_fns = {
        target_name: F.noop for target_name in ("image", "mask", "keypoint", "bbox", "global_label")
    }
    for target_name, custom_apply_fn in {
        "image": image,
        "mask": mask,
        "keypoint": keypoint,
        "bbox": bbox,
        "global_label": global_label,
    }.items():
        if custom_apply_fn is not None:
            if isinstance(custom_apply_fn, LambdaType) and custom_apply_fn.__name__ == "<lambda>":
                warnings.warn(
                    "Using lambda is incompatible with multiprocessing. "
                    "Consider using regular functions or partial().",
                )

            self.custom_apply_fns[target_name] = custom_apply_fn

apply (self, img, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
    fn = self.custom_apply_fns["image"]
    return fn(img, **params)

class Morphological (scale=(2, 3), operation='dilation', always_apply=False, p=0.5) [view source on GitHub]

Apply a morphological operation (dilation or erosion) to an image, with particular value for enhancing document scans.

Morphological operations modify the structure of the image. Dilation expands the white (foreground) regions in a binary or grayscale image, while erosion shrinks them. These operations are beneficial in document processing, for example: - Dilation helps in closing up gaps within text or making thin lines thicker, enhancing legibility for OCR (Optical Character Recognition). - Erosion can remove small white noise and detach connected objects, making the structure of larger objects more pronounced.

Parameters:

Name Type Description
scale int or tuple/list of int

Specifies the size of the structuring element (kernel) used for the operation. - If an integer is provided, a square kernel of that size will be used. - If a tuple or list is provided, it should contain two integers representing the minimum and maximum sizes for the dilation kernel.

operation str

The morphological operation to apply. Options are 'dilation' or 'erosion'. Default is 'dilation'.

always_apply bool

Whether to always apply this transformation. Default is False.

p float

The probability of applying this transformation. Default is 0.5.

Targets

image, mask

Image types: uint8, float32

Examples:

Python
>>> import albumentations as A
>>> transform = A.Compose([
>>>     A.Morphological(scale=(2, 3), operation='dilation', p=0.5)
>>> ])
>>> image = transform(image=image)["image"]
Source code in albumentations/augmentations/transforms.py
Python
class Morphological(DualTransform):
    """Apply a morphological operation (dilation or erosion) to an image,
    with particular value for enhancing document scans.

    Morphological operations modify the structure of the image.
    Dilation expands the white (foreground) regions in a binary or grayscale image, while erosion shrinks them.
    These operations are beneficial in document processing, for example:
    - Dilation helps in closing up gaps within text or making thin lines thicker,
        enhancing legibility for OCR (Optical Character Recognition).
    - Erosion can remove small white noise and detach connected objects,
        making the structure of larger objects more pronounced.

    Args:
        scale (int or tuple/list of int): Specifies the size of the structuring element (kernel) used for the operation.
            - If an integer is provided, a square kernel of that size will be used.
            - If a tuple or list is provided, it should contain two integers representing the minimum
                and maximum sizes for the dilation kernel.
        operation (str, optional): The morphological operation to apply. Options are 'dilation' or 'erosion'.
            Default is 'dilation'.
        always_apply (bool, optional): Whether to always apply this transformation. Default is False.
        p (float, optional): The probability of applying this transformation. Default is 0.5.

    Targets:
        image, mask

    Image types:
        uint8, float32

    Reference:
        https://github.com/facebookresearch/nougat

    Example:
        >>> import albumentations as A
        >>> transform = A.Compose([
        >>>     A.Morphological(scale=(2, 3), operation='dilation', p=0.5)
        >>> ])
        >>> image = transform(image=image)["image"]
    """

    _targets = (Targets.IMAGE, Targets.MASK)

    class InitSchema(BaseTransformInitSchema):
        scale: OnePlusIntRangeType = (2, 3)
        operation: MorphologyMode = "dilation"

    def __init__(
        self,
        scale: ScaleIntType = (2, 3),
        operation: MorphologyMode = "dilation",
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply, p)
        self.scale = cast(Tuple[int, int], scale)
        self.operation = operation

    def apply(self, img: np.ndarray, kernel: Tuple[int, int], **params: Any) -> np.ndarray:
        return F.morphology(img, kernel, self.operation)

    def apply_to_mask(self, mask: np.ndarray, kernel: Tuple[int, int], **params: Any) -> np.ndarray:
        return F.morphology(mask, kernel, self.operation)

    def get_params(self) -> Dict[str, float]:
        return {
            "kernel": cv2.getStructuringElement(cv2.MORPH_ELLIPSE, self.scale),
        }

    def get_transform_init_args_names(self) -> Tuple[str, ...]:
        return ("scale", "operation")

apply (self, img, kernel, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, kernel: Tuple[int, int], **params: Any) -> np.ndarray:
    return F.morphology(img, kernel, self.operation)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    return {
        "kernel": cv2.getStructuringElement(cv2.MORPH_ELLIPSE, self.scale),
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, ...]:
    return ("scale", "operation")

class MultiplicativeNoise (multiplier=(0.9, 1.1), per_channel=False, elementwise=False, always_apply=False, p=0.5) [view source on GitHub]

Multiply image to random number or array of numbers.

Parameters:

Name Type Description
multiplier Union[float, Tuple[float, float]]

If single float image will be multiplied to this number. If tuple of float multiplier will be in range [multiplier[0], multiplier[1]). Default: (0.9, 1.1).

per_channel bool

If False, same values for all channels will be used. If True use sample values for each channels. Default False.

elementwise bool

If False multiply multiply all pixels in an image with a random value sampled once. If True Multiply image pixels with values that are pixelwise randomly sampled. Default: False.

Targets

image

Image types: Any

Source code in albumentations/augmentations/transforms.py
Python
class MultiplicativeNoise(ImageOnlyTransform):
    """Multiply image to random number or array of numbers.

    Args:
        multiplier: If single float image will be multiplied to this number.
            If tuple of float multiplier will be in range `[multiplier[0], multiplier[1])`. Default: (0.9, 1.1).
        per_channel: If `False`, same values for all channels will be used.
            If `True` use sample values for each channels. Default False.
        elementwise: If `False` multiply multiply all pixels in an image with a random value sampled once.
            If `True` Multiply image pixels with values that are pixelwise randomly sampled. Default: False.

    Targets:
        image

    Image types:
        Any

    """

    class InitSchema(BaseTransformInitSchema):
        multiplier: NonNegativeFloatRangeType = (0.9, 1.1)
        per_channel: bool = Field(default=False, description="Apply multiplier per channel.")
        elementwise: bool = Field(default=False, description="Apply multiplier element-wise to pixels.")

    def __init__(
        self,
        multiplier: ScaleFloatType = (0.9, 1.1),
        per_channel: bool = False,
        elementwise: bool = False,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.multiplier = cast(Tuple[float, float], multiplier)
        self.per_channel = per_channel
        self.elementwise = elementwise

    def apply(self, img: np.ndarray, multiplier: float = np.array([1]), **kwargs: Any) -> np.ndarray:
        return F.multiply(img, multiplier)

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
        if self.multiplier[0] == self.multiplier[1]:
            return {"multiplier": np.array([self.multiplier[0]])}

        img = params["image"]

        height, width = img.shape[:2]

        num_channels = (1 if is_grayscale_image(img) else img.shape[-1]) if self.per_channel else 1

        shape = [height, width, num_channels] if self.elementwise else [num_channels]

        multiplier = random_utils.uniform(self.multiplier[0], self.multiplier[1], tuple(shape))
        if is_grayscale_image(img) and img.ndim == GRAYSCALE_SHAPE_LEN:
            multiplier = np.squeeze(multiplier)

        return {"multiplier": multiplier}

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_transform_init_args_names(self) -> Tuple[str, str, str]:
        return "multiplier", "per_channel", "elementwise"

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, multiplier=array([1]), **kwargs)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, multiplier: float = np.array([1]), **kwargs: Any) -> np.ndarray:
    return F.multiply(img, multiplier)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
    if self.multiplier[0] == self.multiplier[1]:
        return {"multiplier": np.array([self.multiplier[0]])}

    img = params["image"]

    height, width = img.shape[:2]

    num_channels = (1 if is_grayscale_image(img) else img.shape[-1]) if self.per_channel else 1

    shape = [height, width, num_channels] if self.elementwise else [num_channels]

    multiplier = random_utils.uniform(self.multiplier[0], self.multiplier[1], tuple(shape))
    if is_grayscale_image(img) and img.ndim == GRAYSCALE_SHAPE_LEN:
        multiplier = np.squeeze(multiplier)

    return {"multiplier": multiplier}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str]:
    return "multiplier", "per_channel", "elementwise"

class Normalize (mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0, normalization='standard', always_apply=False, p=1.0) [view source on GitHub]

Applies various normalization techniques to an image. The specific normalization technique can be selected with the normalization parameter.

Standard normalization is applied using the formula: img = (img - mean * max_pixel_value) / (std * max_pixel_value). Other normalization techniques adjust the image based on global or per-channel statistics, or scale pixel values to a specified range.

Parameters:

Name Type Description
mean Optional[ColorType]

Mean values for standard normalization. For "standard" normalization, the default values are ImageNet mean values: (0.485, 0.456, 0.406). For "inception" normalization, use mean values of (0.5, 0.5, 0.5).

std Optional[ColorType]

Standard deviation values for standard normalization. For "standard" normalization, the default values are ImageNet standard deviation :(0.229, 0.224, 0.225). For "inception" normalization, use standard deviation values of (0.5, 0.5, 0.5).

max_pixel_value Optional[float]

Maximum possible pixel value, used for scaling in standard normalization. Defaults to 255.0.

normalization Literal["standard", "image", "image_per_channel", "min_max", "min_max_per_channel", "inception"]) Specifies the normalization technique to apply. Defaults to "standard". - "standard"

Applies the formula (img - mean * max_pixel_value) / (std * max_pixel_value). The default mean and std are based on ImageNet. - "image": Normalizes the whole image based on its global mean and standard deviation. - "image_per_channel": Normalizes the image per channel based on each channel's mean and standard deviation. - "min_max": Scales the image pixel values to a [0, 1] range based on the global minimum and maximum pixel values. - "min_max_per_channel": Scales each channel of the image pixel values to a [0, 1] range based on the per-channel minimum and maximum pixel values.

p float

Probability of applying the transform. Defaults to 1.0.

Targets

image

Image types: uint8, float32

Note

For "standard" normalization, mean, std, and max_pixel_value must be provided. For other normalization types, these parameters are ignored.

Source code in albumentations/augmentations/transforms.py
Python
class Normalize(ImageOnlyTransform):
    """Applies various normalization techniques to an image. The specific normalization technique can be selected
        with the `normalization` parameter.

    Standard normalization is applied using the formula:
        `img = (img - mean * max_pixel_value) / (std * max_pixel_value)`.
        Other normalization techniques adjust the image based on global or per-channel statistics,
        or scale pixel values to a specified range.

    Args:
        mean (Optional[ColorType]): Mean values for standard normalization.
            For "standard" normalization, the default values are ImageNet mean values: (0.485, 0.456, 0.406).
            For "inception" normalization, use mean values of (0.5, 0.5, 0.5).
        std (Optional[ColorType]): Standard deviation values for standard normalization.
            For "standard" normalization, the default values are ImageNet standard deviation :(0.229, 0.224, 0.225).
            For "inception" normalization, use standard deviation values of (0.5, 0.5, 0.5).
        max_pixel_value (Optional[float]): Maximum possible pixel value, used for scaling in standard normalization.
            Defaults to 255.0.
        normalization (Literal["standard", "image", "image_per_channel", "min_max", "min_max_per_channel", "inception"])
            Specifies the normalization technique to apply. Defaults to "standard".
            - "standard": Applies the formula `(img - mean * max_pixel_value) / (std * max_pixel_value)`.
                The default mean and std are based on ImageNet.
            - "image": Normalizes the whole image based on its global mean and standard deviation.
            - "image_per_channel": Normalizes the image per channel based on each channel's mean and standard deviation.
            - "min_max": Scales the image pixel values to a [0, 1] range based on the global
                minimum and maximum pixel values.
            - "min_max_per_channel": Scales each channel of the image pixel values to a [0, 1]
                range based on the per-channel minimum and maximum pixel values.

        p (float): Probability of applying the transform. Defaults to 1.0.

    Targets:
        image

    Image types:
        uint8, float32

    Note:
        For "standard" normalization, `mean`, `std`, and `max_pixel_value` must be provided.
        For other normalization types, these parameters are ignored.
    """

    class InitSchema(BaseTransformInitSchema):
        mean: Optional[ColorType] = Field(
            default=(0.485, 0.456, 0.406),
            description="Mean values for normalization, defaulting to ImageNet mean values.",
        )
        std: Optional[ColorType] = Field(
            default=(0.229, 0.224, 0.225),
            description="Standard deviation values for normalization, defaulting to ImageNet std values.",
        )
        max_pixel_value: Optional[float] = Field(default=255.0, description="Maximum possible pixel value.")
        normalization: Literal[
            "standard",
            "image",
            "image_per_channel",
            "min_max",
            "min_max_per_channel",
        ] = "standard"
        p: ProbabilityType = 1

        @model_validator(mode="after")
        def validate_normalization(self) -> Self:
            if (
                self.mean is None
                or self.std is None
                or self.max_pixel_value is None
                and self.normalization == "standard"
            ):
                raise ValueError("mean, std, and max_pixel_value must be provided for standard normalization.")
            return self

    def __init__(
        self,
        mean: Optional[ColorType] = (0.485, 0.456, 0.406),
        std: Optional[ColorType] = (0.229, 0.224, 0.225),
        max_pixel_value: Optional[float] = 255.0,
        normalization: Literal["standard", "image", "image_per_channel", "min_max", "min_max_per_channel"] = "standard",
        always_apply: bool = False,
        p: float = 1.0,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.mean = mean
        self.std = std
        self.max_pixel_value = max_pixel_value
        self.normalization = normalization

    def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
        if self.normalization == "standard":
            return F.normalize(
                img,
                cast(ColorType, self.mean),
                cast(ColorType, self.std),
                cast(float, self.max_pixel_value),
            )
        if self.normalization in {"image", "image_per_channel", "min_max", "min_max_per_channel"}:
            return F.normalize_per_image(img, self.normalization)
        raise ValueError(f"Unknown normalization type: {self.normalization}")

    def get_transform_init_args_names(self) -> Tuple[str, ...]:
        return ("mean", "std", "max_pixel_value", "normalization")

apply (self, img, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
    if self.normalization == "standard":
        return F.normalize(
            img,
            cast(ColorType, self.mean),
            cast(ColorType, self.std),
            cast(float, self.max_pixel_value),
        )
    if self.normalization in {"image", "image_per_channel", "min_max", "min_max_per_channel"}:
        return F.normalize_per_image(img, self.normalization)
    raise ValueError(f"Unknown normalization type: {self.normalization}")

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, ...]:
    return ("mean", "std", "max_pixel_value", "normalization")

class PixelDropout (dropout_prob=0.01, per_channel=False, drop_value=0, mask_drop_value=None, always_apply=False, p=0.5) [view source on GitHub]

Set pixels to 0 with some probability.

Parameters:

Name Type Description
dropout_prob float

pixel drop probability. Default: 0.01

per_channel bool

if set to True drop mask will be sampled for each channel, otherwise the same mask will be sampled for all channels. Default: False

drop_value number or sequence of numbers or None

Value that will be set in dropped place. If set to None value will be sampled randomly, default ranges will be used: - uint8 - [0, 255] - uint16 - [0, 65535] - uint32 - [0, 4294967295] - float, double - [0, 1] Default: 0

mask_drop_value number or sequence of numbers or None

Value that will be set in dropped place in masks. If set to None masks will be unchanged. Default: 0

p float

probability of applying the transform. Default: 0.5.

Targets

image, mask

Image types: any

Source code in albumentations/augmentations/transforms.py
Python
class PixelDropout(DualTransform):
    """Set pixels to 0 with some probability.

    Args:
        dropout_prob (float): pixel drop probability. Default: 0.01
        per_channel (bool): if set to `True` drop mask will be sampled for each channel,
            otherwise the same mask will be sampled for all channels. Default: False
        drop_value (number or sequence of numbers or None): Value that will be set in dropped place.
            If set to None value will be sampled randomly, default ranges will be used:
                - uint8 - [0, 255]
                - uint16 - [0, 65535]
                - uint32 - [0, 4294967295]
                - float, double - [0, 1]
            Default: 0
        mask_drop_value (number or sequence of numbers or None): Value that will be set in dropped place in masks.
            If set to None masks will be unchanged. Default: 0
        p (float): probability of applying the transform. Default: 0.5.

    Targets:
        image, mask
    Image types:
        any

    """

    class InitSchema(BaseTransformInitSchema):
        dropout_prob: ProbabilityType = 0.01
        per_channel: bool = Field(default=False, description="Sample drop mask per channel.")
        drop_value: Optional[ScaleFloatType] = Field(
            default=0,
            description="Value to set in dropped pixels. None for random sampling.",
        )
        mask_drop_value: Optional[ScaleFloatType] = Field(
            default=None,
            description="Value to set in dropped pixels in masks. None to leave masks unchanged.",
        )

        @model_validator(mode="after")
        def validate_mask_drop_value(self) -> Self:
            if self.mask_drop_value is not None and self.per_channel:
                msg = "PixelDropout supports mask only with per_channel=False."
                raise ValueError(msg)
            return self

    _targets = (Targets.IMAGE, Targets.MASK)

    def __init__(
        self,
        dropout_prob: float = 0.01,
        per_channel: bool = False,
        drop_value: Optional[ScaleFloatType] = 0,
        mask_drop_value: Optional[ScaleFloatType] = None,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.dropout_prob = dropout_prob
        self.per_channel = per_channel
        self.drop_value = drop_value
        self.mask_drop_value = mask_drop_value

    def apply(
        self,
        img: np.ndarray,
        drop_mask: Optional[np.ndarray] = None,
        drop_value: Union[float, Sequence[float]] = (),
        **params: Any,
    ) -> np.ndarray:
        return F.pixel_dropout(img, drop_mask, drop_value)

    def apply_to_mask(self, mask: np.ndarray, drop_mask: Optional[np.ndarray] = None, **params: Any) -> np.ndarray:
        if self.mask_drop_value is None:
            return mask

        if mask.ndim == GRAYSCALE_SHAPE_LEN:
            drop_mask = np.squeeze(drop_mask)

        return F.pixel_dropout(mask, drop_mask, self.mask_drop_value)

    def apply_to_bbox(self, bbox: BoxInternalType, **params: Any) -> BoxInternalType:
        return bbox

    def apply_to_keypoint(self, keypoint: KeypointInternalType, **params: Any) -> KeypointInternalType:
        return keypoint

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
        img = params["image"]
        shape = img.shape if self.per_channel else img.shape[:2]

        rnd = np.random.RandomState(random.randint(0, 1 << 31))
        # Use choice to create boolean matrix, if we will use binomial after that we will need type conversion
        drop_mask = rnd.choice([True, False], shape, p=[self.dropout_prob, 1 - self.dropout_prob])

        drop_value: Union[float, Sequence[float], np.ndarray]
        if drop_mask.ndim != img.ndim:
            drop_mask = np.expand_dims(drop_mask, -1)
        if self.drop_value is None:
            drop_shape = 1 if is_grayscale_image(img) else int(img.shape[-1])

            if img.dtype in (np.uint8, np.uint16, np.uint32):
                drop_value = rnd.randint(0, int(F.MAX_VALUES_BY_DTYPE[img.dtype]), drop_shape, img.dtype)
            elif img.dtype in [np.float32, np.double]:
                drop_value = rnd.uniform(0, 1, drop_shape).astype(img.dtype)
            else:
                raise ValueError(f"Unsupported dtype: {img.dtype}")
        else:
            drop_value = self.drop_value

        return {"drop_mask": drop_mask, "drop_value": drop_value}

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
        return ("dropout_prob", "per_channel", "drop_value", "mask_drop_value")

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, drop_mask=None, drop_value=(), **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    drop_mask: Optional[np.ndarray] = None,
    drop_value: Union[float, Sequence[float]] = (),
    **params: Any,
) -> np.ndarray:
    return F.pixel_dropout(img, drop_mask, drop_value)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
    img = params["image"]
    shape = img.shape if self.per_channel else img.shape[:2]

    rnd = np.random.RandomState(random.randint(0, 1 << 31))
    # Use choice to create boolean matrix, if we will use binomial after that we will need type conversion
    drop_mask = rnd.choice([True, False], shape, p=[self.dropout_prob, 1 - self.dropout_prob])

    drop_value: Union[float, Sequence[float], np.ndarray]
    if drop_mask.ndim != img.ndim:
        drop_mask = np.expand_dims(drop_mask, -1)
    if self.drop_value is None:
        drop_shape = 1 if is_grayscale_image(img) else int(img.shape[-1])

        if img.dtype in (np.uint8, np.uint16, np.uint32):
            drop_value = rnd.randint(0, int(F.MAX_VALUES_BY_DTYPE[img.dtype]), drop_shape, img.dtype)
        elif img.dtype in [np.float32, np.double]:
            drop_value = rnd.uniform(0, 1, drop_shape).astype(img.dtype)
        else:
            raise ValueError(f"Unsupported dtype: {img.dtype}")
    else:
        drop_value = self.drop_value

    return {"drop_mask": drop_mask, "drop_value": drop_value}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
    return ("dropout_prob", "per_channel", "drop_value", "mask_drop_value")

class Posterize (num_bits=4, always_apply=False, p=0.5) [view source on GitHub]

Reduce the number of bits for each color channel.

Parameters:

Name Type Description
num_bits int, int) or int, or list of ints [r, g, b], or list of ints [[r1, r1], [g1, g2], [b1, b2]]

number of high bits. If num_bits is a single value, the range will be [num_bits, num_bits]. Must be in range [0, 8]. Default: 4.

p float

probability of applying the transform. Default: 0.5.

Targets: image

Image types: uint8

Source code in albumentations/augmentations/transforms.py
Python
class Posterize(ImageOnlyTransform):
    """Reduce the number of bits for each color channel.

    Args:
        num_bits ((int, int) or int,
                  or list of ints [r, g, b],
                  or list of ints [[r1, r1], [g1, g2], [b1, b2]]): number of high bits.
            If num_bits is a single value, the range will be [num_bits, num_bits].
            Must be in range [0, 8]. Default: 4.
        p: probability of applying the transform. Default: 0.5.

    Targets:
    image

    Image types:
        uint8

    """

    class InitSchema(BaseTransformInitSchema):
        num_bits: Annotated[
            Union[int, Tuple[int, int], Tuple[int, int, int]],
            Field(default=4, description="Number of high bits"),
        ]

        @field_validator("num_bits")
        @classmethod
        def validate_num_bits(cls, num_bits: Any) -> Union[Tuple[int, int], List[Tuple[int, int]]]:
            if isinstance(num_bits, int):
                return cast(Tuple[int, int], to_tuple(num_bits, num_bits))
            if isinstance(num_bits, Sequence) and len(num_bits) == NUM_BITS_ARRAY_LENGTH:
                return [cast(Tuple[int, int], to_tuple(i, 0)) for i in num_bits]
            return cast(Tuple[int, int], to_tuple(num_bits, 0))

    def __init__(
        self,
        num_bits: Union[int, Tuple[int, int], Tuple[int, int, int]] = 4,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.num_bits = cast(Union[Tuple[int, ...], List[Tuple[int, ...]]], num_bits)

    def apply(self, img: np.ndarray, num_bits: int = 1, **params: Any) -> np.ndarray:
        return F.posterize(img, num_bits)

    def get_params(self) -> Dict[str, Any]:
        if len(self.num_bits) == NUM_BITS_ARRAY_LENGTH:
            return {"num_bits": [random.randint(int(i[0]), int(i[1])) for i in self.num_bits]}  # type: ignore[index]
        num_bits = self.num_bits
        return {"num_bits": random.randint(int(num_bits[0]), int(num_bits[1]))}  # type: ignore[arg-type]

    def get_transform_init_args_names(self) -> Tuple[str]:
        return ("num_bits",)

apply (self, img, num_bits=1, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, num_bits: int = 1, **params: Any) -> np.ndarray:
    return F.posterize(img, num_bits)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, Any]:
    if len(self.num_bits) == NUM_BITS_ARRAY_LENGTH:
        return {"num_bits": [random.randint(int(i[0]), int(i[1])) for i in self.num_bits]}  # type: ignore[index]
    num_bits = self.num_bits
    return {"num_bits": random.randint(int(num_bits[0]), int(num_bits[1]))}  # type: ignore[arg-type]

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str]:
    return ("num_bits",)

class RGBShift (r_shift_limit=(-20, 20), g_shift_limit=(-20, 20), b_shift_limit=(-20, 20), always_apply=False, p=0.5) [view source on GitHub]

Randomly shift values for each channel of the input RGB image.

Parameters:

Name Type Description
r_shift_limit Union[int, Tuple[int, int]]

range for changing values for the red channel. If r_shift_limit is a single int, the range will be (-r_shift_limit, r_shift_limit). Default: (-20, 20).

g_shift_limit Union[int, Tuple[int, int]]

range for changing values for the green channel. If g_shift_limit is a single int, the range will be (-g_shift_limit, g_shift_limit). Default: (-20, 20).

b_shift_limit Union[int, Tuple[int, int]]

range for changing values for the blue channel. If b_shift_limit is a single int, the range will be (-b_shift_limit, b_shift_limit). Default: (-20, 20).

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class RGBShift(ImageOnlyTransform):
    """Randomly shift values for each channel of the input RGB image.

    Args:
        r_shift_limit: range for changing values for the red channel. If r_shift_limit is a single
            int, the range will be (-r_shift_limit, r_shift_limit). Default: (-20, 20).
        g_shift_limit: range for changing values for the green channel. If g_shift_limit is a
            single int, the range  will be (-g_shift_limit, g_shift_limit). Default: (-20, 20).
        b_shift_limit: range for changing values for the blue channel. If b_shift_limit is a single
            int, the range will be (-b_shift_limit, b_shift_limit). Default: (-20, 20).
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        r_shift_limit: SymmetricRangeType = (-20, 20)
        g_shift_limit: SymmetricRangeType = (-20, 20)
        b_shift_limit: SymmetricRangeType = (-20, 20)

    def __init__(
        self,
        r_shift_limit: ScaleIntType = (-20, 20),
        g_shift_limit: ScaleIntType = (-20, 20),
        b_shift_limit: ScaleIntType = (-20, 20),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.r_shift_limit = cast(Tuple[float, float], r_shift_limit)
        self.g_shift_limit = cast(Tuple[float, float], g_shift_limit)
        self.b_shift_limit = cast(Tuple[float, float], b_shift_limit)

    def apply(self, img: np.ndarray, r_shift: int = 0, g_shift: int = 0, b_shift: int = 0, **params: Any) -> np.ndarray:
        if not is_rgb_image(img):
            msg = "RGBShift transformation expects 3-channel images."
            raise TypeError(msg)
        return F.shift_rgb(img, r_shift, g_shift, b_shift)

    def get_params(self) -> Dict[str, Any]:
        return {
            "r_shift": random.uniform(self.r_shift_limit[0], self.r_shift_limit[1]),
            "g_shift": random.uniform(self.g_shift_limit[0], self.g_shift_limit[1]),
            "b_shift": random.uniform(self.b_shift_limit[0], self.b_shift_limit[1]),
        }

    def get_transform_init_args_names(self) -> Tuple[str, str, str]:
        return ("r_shift_limit", "g_shift_limit", "b_shift_limit")

apply (self, img, r_shift=0, g_shift=0, b_shift=0, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, r_shift: int = 0, g_shift: int = 0, b_shift: int = 0, **params: Any) -> np.ndarray:
    if not is_rgb_image(img):
        msg = "RGBShift transformation expects 3-channel images."
        raise TypeError(msg)
    return F.shift_rgb(img, r_shift, g_shift, b_shift)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, Any]:
    return {
        "r_shift": random.uniform(self.r_shift_limit[0], self.r_shift_limit[1]),
        "g_shift": random.uniform(self.g_shift_limit[0], self.g_shift_limit[1]),
        "b_shift": random.uniform(self.b_shift_limit[0], self.b_shift_limit[1]),
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str]:
    return ("r_shift_limit", "g_shift_limit", "b_shift_limit")

class RandomBrightnessContrast (brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), brightness_by_max=True, always_apply=False, p=0.5) [view source on GitHub]

Randomly change brightness and contrast of the input image.

Parameters:

Name Type Description
brightness_limit Union[float, Tuple[float, float]]

factor range for changing brightness. If limit is a single float, the range will be (-limit, limit). Default: (-0.2, 0.2).

contrast_limit Union[float, Tuple[float, float]]

factor range for changing contrast. If limit is a single float, the range will be (-limit, limit). Default: (-0.2, 0.2).

brightness_by_max bool

If True adjust contrast by image dtype maximum, else adjust contrast by image mean.

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class RandomBrightnessContrast(ImageOnlyTransform):
    """Randomly change brightness and contrast of the input image.

    Args:
        brightness_limit: factor range for changing brightness.
            If limit is a single float, the range will be (-limit, limit). Default: (-0.2, 0.2).
        contrast_limit: factor range for changing contrast.
            If limit is a single float, the range will be (-limit, limit). Default: (-0.2, 0.2).
        brightness_by_max: If True adjust contrast by image dtype maximum,
            else adjust contrast by image mean.
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        brightness_limit: SymmetricRangeType = (-0.2, 0.2)
        contrast_limit: SymmetricRangeType = (-0.2, 0.2)
        brightness_by_max: bool = Field(default=True, description="Adjust brightness by image dtype maximum if True.")

    def __init__(
        self,
        brightness_limit: ScaleFloatType = (-0.2, 0.2),
        contrast_limit: ScaleFloatType = (-0.2, 0.2),
        brightness_by_max: bool = True,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.brightness_limit = cast(Tuple[float, float], brightness_limit)
        self.contrast_limit = cast(Tuple[float, float], contrast_limit)
        self.brightness_by_max = brightness_by_max

    def apply(self, img: np.ndarray, alpha: float = 1.0, beta: float = 0.0, **params: Any) -> np.ndarray:
        return F.brightness_contrast_adjust(img, alpha, beta, self.brightness_by_max)

    def get_params(self) -> Dict[str, float]:
        return {
            "alpha": 1.0 + random.uniform(self.contrast_limit[0], self.contrast_limit[1]),
            "beta": 0.0 + random.uniform(self.brightness_limit[0], self.brightness_limit[1]),
        }

    def get_transform_init_args_names(self) -> Tuple[str, str, str]:
        return ("brightness_limit", "contrast_limit", "brightness_by_max")

apply (self, img, alpha=1.0, beta=0.0, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, alpha: float = 1.0, beta: float = 0.0, **params: Any) -> np.ndarray:
    return F.brightness_contrast_adjust(img, alpha, beta, self.brightness_by_max)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    return {
        "alpha": 1.0 + random.uniform(self.contrast_limit[0], self.contrast_limit[1]),
        "beta": 0.0 + random.uniform(self.brightness_limit[0], self.brightness_limit[1]),
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str]:
    return ("brightness_limit", "contrast_limit", "brightness_by_max")

class RandomFog (fog_coef_lower=0.3, fog_coef_upper=1, alpha_coef=0.08, always_apply=False, p=0.5) [view source on GitHub]

Simulates fog for the image

Parameters:

Name Type Description
fog_coef_lower float

lower limit for fog intensity coefficient. Should be in [0, 1] range.

fog_coef_upper float

upper limit for fog intensity coefficient. Should be in [0, 1] range.

alpha_coef float

transparency of the fog circles. Should be in [0, 1] range.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class RandomFog(ImageOnlyTransform):
    """Simulates fog for the image

    Args:
        fog_coef_lower: lower limit for fog intensity coefficient. Should be in [0, 1] range.
        fog_coef_upper: upper limit for fog intensity coefficient. Should be in [0, 1] range.
        alpha_coef: transparency of the fog circles. Should be in [0, 1] range.

    Targets:
        image

    Image types:
        uint8, float32

    Reference:
        https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library


    """

    class InitSchema(BaseTransformInitSchema):
        fog_coef_lower: float = Field(default=0.3, description="Lower limit for fog intensity coefficient", ge=0, le=1)
        fog_coef_upper: float = Field(default=1, description="Upper limit for fog intensity coefficient", ge=0, le=1)
        alpha_coef: float = Field(default=0.08, description="Transparency of the fog circles", ge=0, le=1)

        @model_validator(mode="after")
        def validate_fog_coefficients(self) -> Self:
            if self.fog_coef_lower > self.fog_coef_upper:
                msg = "fog_coef_upper must be greater than or equal to fog_coef_lower."
                raise ValueError(msg)
            return self

    def __init__(
        self,
        fog_coef_lower: float = 0.3,
        fog_coef_upper: float = 1,
        alpha_coef: float = 0.08,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.fog_coef_lower = fog_coef_lower
        self.fog_coef_upper = fog_coef_upper
        self.alpha_coef = alpha_coef

    def apply(
        self,
        img: np.ndarray,
        fog_coef: np.ndarray = 0.1,
        haze_list: Optional[List[Tuple[int, int]]] = None,
        **params: Any,
    ) -> np.ndarray:
        if haze_list is None:
            haze_list = []
        return F.add_fog(img, fog_coef, self.alpha_coef, haze_list)

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
        img = params["image"]
        fog_coef = random.uniform(self.fog_coef_lower, self.fog_coef_upper)

        height, width = imshape = img.shape[:2]

        hw = max(1, int(width // 3 * fog_coef))

        haze_list = []
        midx = width // 2 - 2 * hw
        midy = height // 2 - hw
        index = 1

        while midx > -hw or midy > -hw:
            for _ in range(hw // 10 * index):
                x = random_utils.randint(midx, width - midx - hw + 1)
                y = random_utils.randint(midy, height - midy - hw + 1)
                haze_list.append((x, y))

            midx -= 3 * hw * width // sum(imshape)
            midy -= 3 * hw * height // sum(imshape)
            index += 1

        return {"haze_list": haze_list, "fog_coef": fog_coef}

    def get_transform_init_args_names(self) -> Tuple[str, str, str]:
        return ("fog_coef_lower", "fog_coef_upper", "alpha_coef")

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, fog_coef=0.1, haze_list=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    fog_coef: np.ndarray = 0.1,
    haze_list: Optional[List[Tuple[int, int]]] = None,
    **params: Any,
) -> np.ndarray:
    if haze_list is None:
        haze_list = []
    return F.add_fog(img, fog_coef, self.alpha_coef, haze_list)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
    img = params["image"]
    fog_coef = random.uniform(self.fog_coef_lower, self.fog_coef_upper)

    height, width = imshape = img.shape[:2]

    hw = max(1, int(width // 3 * fog_coef))

    haze_list = []
    midx = width // 2 - 2 * hw
    midy = height // 2 - hw
    index = 1

    while midx > -hw or midy > -hw:
        for _ in range(hw // 10 * index):
            x = random_utils.randint(midx, width - midx - hw + 1)
            y = random_utils.randint(midy, height - midy - hw + 1)
            haze_list.append((x, y))

        midx -= 3 * hw * width // sum(imshape)
        midy -= 3 * hw * height // sum(imshape)
        index += 1

    return {"haze_list": haze_list, "fog_coef": fog_coef}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str]:
    return ("fog_coef_lower", "fog_coef_upper", "alpha_coef")

class RandomGamma (gamma_limit=(80, 120), always_apply=False, p=0.5) [view source on GitHub]

Applies random gamma correction to an image as a form of data augmentation.

This class adjusts the luminance of an image by applying gamma correction with a randomly selected gamma value from a specified range. Gamma correction can simulate various lighting conditions, potentially enhancing model generalization.

Attributes:

Name Type Description
gamma_limit Union[int, Tuple[int, int]]

The range for gamma adjustment. If gamma_limit is a single int, the range will be interpreted as (-gamma_limit, gamma_limit), defining how much to adjust the image's gamma. Default is (80, 120).

always_apply bool

If True, the transform will always be applied, regardless of p. Default is False.

p float

The probability that the transform will be applied. Default is 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class RandomGamma(ImageOnlyTransform):
    """Applies random gamma correction to an image as a form of data augmentation.

    This class adjusts the luminance of an image by applying gamma correction with a randomly
    selected gamma value from a specified range. Gamma correction can simulate various lighting
    conditions, potentially enhancing model generalization.

    Attributes:
        gamma_limit (Union[int, Tuple[int, int]]): The range for gamma adjustment. If `gamma_limit` is a single
            int, the range will be interpreted as (-gamma_limit, gamma_limit), defining how much
            to adjust the image's gamma. Default is (80, 120).
        always_apply (bool): If `True`, the transform will always be applied, regardless of `p`.
            Default is `False`.
        p (float): The probability that the transform will be applied. Default is 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    Reference:
         https://en.wikipedia.org/wiki/Gamma_correction

    """

    class InitSchema(BaseTransformInitSchema):
        gamma_limit: OnePlusFloatRangeType = (80, 120)

    def __init__(
        self,
        gamma_limit: ScaleIntType = (80, 120),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply, p)
        self.gamma_limit = cast(Tuple[float, float], gamma_limit)

    def apply(self, img: np.ndarray, gamma: float = 1, **params: Any) -> np.ndarray:
        return F.gamma_transform(img, gamma=gamma)

    def get_params(self) -> Dict[str, float]:
        return {"gamma": random.uniform(self.gamma_limit[0], self.gamma_limit[1]) / 100.0}

    def get_transform_init_args_names(self) -> Tuple[str, ...]:
        return ("gamma_limit",)

apply (self, img, gamma=1, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, gamma: float = 1, **params: Any) -> np.ndarray:
    return F.gamma_transform(img, gamma=gamma)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    return {"gamma": random.uniform(self.gamma_limit[0], self.gamma_limit[1]) / 100.0}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, ...]:
    return ("gamma_limit",)

class RandomGravel (gravel_roi=(0.1, 0.4, 0.9, 0.9), number_of_patches=2, always_apply=False, p=0.5) [view source on GitHub]

Add gravels.

From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

Parameters:

Name Type Description
gravel_roi Tuple[float, float, float, float]

(top-left x, top-left y, bottom-right x, bottom right y). Should be in [0, 1] range

number_of_patches int

no. of gravel patches required

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class RandomGravel(ImageOnlyTransform):
    """Add gravels.

    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

    Args:
        gravel_roi: (top-left x, top-left y,
            bottom-right x, bottom right y). Should be in [0, 1] range
        number_of_patches: no. of gravel patches required

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        gravel_roi: Tuple[float, float, float, float] = Field(
            default=(0.1, 0.4, 0.9, 0.9),
            description="Region of interest for gravel placement",
        )
        number_of_patches: int = Field(default=2, description="Number of gravel patches", ge=1)

        @model_validator(mode="after")
        def validate_gravel_roi(self) -> Self:
            gravel_lower_x, gravel_lower_y, gravel_upper_x, gravel_upper_y = self.gravel_roi
            if not 0 <= gravel_lower_x < gravel_upper_x <= 1 or not 0 <= gravel_lower_y < gravel_upper_y <= 1:
                raise ValueError(f"Invalid gravel_roi. Got: {self.gravel_roi}.")
            return self

    def __init__(
        self,
        gravel_roi: Tuple[float, float, float, float] = (0.1, 0.4, 0.9, 0.9),
        number_of_patches: int = 2,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply, p)
        self.gravel_roi = gravel_roi
        self.number_of_patches = number_of_patches

    def generate_gravel_patch(self, rectangular_roi: Tuple[int, int, int, int]) -> np.ndarray:
        x1, y1, x2, y2 = rectangular_roi
        area = abs((x2 - x1) * (y2 - y1))
        count = area // 10
        gravels = np.empty([count, 2], dtype=np.int64)
        gravels[:, 0] = random_utils.randint(x1, x2, count)
        gravels[:, 1] = random_utils.randint(y1, y2, count)
        return gravels

    def apply(self, img: np.ndarray, gravels_infos: Optional[List[Any]] = None, **params: Any) -> np.ndarray:
        if gravels_infos is None:
            gravels_infos = []
        return F.add_gravel(img, gravels_infos)

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, np.ndarray]:
        img = params["image"]
        height, width = img.shape[:2]

        x_min, y_min, x_max, y_max = self.gravel_roi
        x_min = int(x_min * width)
        x_max = int(x_max * width)
        y_min = int(y_min * height)
        y_max = int(y_max * height)

        max_height = 200
        max_width = 30

        rectangular_rois = np.zeros([self.number_of_patches, 4], dtype=np.int64)
        xx1 = random_utils.randint(x_min + 1, x_max, self.number_of_patches)  # xmax
        xx2 = random_utils.randint(x_min, xx1)  # xmin
        yy1 = random_utils.randint(y_min + 1, y_max, self.number_of_patches)  # ymax
        yy2 = random_utils.randint(y_min, yy1)  # ymin

        rectangular_rois[:, 0] = xx2
        rectangular_rois[:, 1] = yy2
        rectangular_rois[:, 2] = [min(tup) for tup in zip(xx1, xx2 + max_height)]
        rectangular_rois[:, 3] = [min(tup) for tup in zip(yy1, yy2 + max_width)]

        minx = []
        maxx = []
        miny = []
        maxy = []
        val = []
        for roi in rectangular_rois:
            gravels = self.generate_gravel_patch(roi)
            x = gravels[:, 0]
            y = gravels[:, 1]
            r = random_utils.randint(1, 4, len(gravels))
            sat = random_utils.randint(0, 255, len(gravels))
            miny.append(np.maximum(y - r, 0))
            maxy.append(np.minimum(y + r, y))
            minx.append(np.maximum(x - r, 0))
            maxx.append(np.minimum(x + r, x))
            val.append(sat)

        return {
            "gravels_infos": np.stack(
                [
                    np.concatenate(miny),
                    np.concatenate(maxy),
                    np.concatenate(minx),
                    np.concatenate(maxx),
                    np.concatenate(val),
                ],
                1,
            ),
        }

    def get_transform_init_args_names(self) -> Tuple[str, str]:
        return "gravel_roi", "number_of_patches"

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, gravels_infos=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, gravels_infos: Optional[List[Any]] = None, **params: Any) -> np.ndarray:
    if gravels_infos is None:
        gravels_infos = []
    return F.add_gravel(img, gravels_infos)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, np.ndarray]:
    img = params["image"]
    height, width = img.shape[:2]

    x_min, y_min, x_max, y_max = self.gravel_roi
    x_min = int(x_min * width)
    x_max = int(x_max * width)
    y_min = int(y_min * height)
    y_max = int(y_max * height)

    max_height = 200
    max_width = 30

    rectangular_rois = np.zeros([self.number_of_patches, 4], dtype=np.int64)
    xx1 = random_utils.randint(x_min + 1, x_max, self.number_of_patches)  # xmax
    xx2 = random_utils.randint(x_min, xx1)  # xmin
    yy1 = random_utils.randint(y_min + 1, y_max, self.number_of_patches)  # ymax
    yy2 = random_utils.randint(y_min, yy1)  # ymin

    rectangular_rois[:, 0] = xx2
    rectangular_rois[:, 1] = yy2
    rectangular_rois[:, 2] = [min(tup) for tup in zip(xx1, xx2 + max_height)]
    rectangular_rois[:, 3] = [min(tup) for tup in zip(yy1, yy2 + max_width)]

    minx = []
    maxx = []
    miny = []
    maxy = []
    val = []
    for roi in rectangular_rois:
        gravels = self.generate_gravel_patch(roi)
        x = gravels[:, 0]
        y = gravels[:, 1]
        r = random_utils.randint(1, 4, len(gravels))
        sat = random_utils.randint(0, 255, len(gravels))
        miny.append(np.maximum(y - r, 0))
        maxy.append(np.minimum(y + r, y))
        minx.append(np.maximum(x - r, 0))
        maxx.append(np.minimum(x + r, x))
        val.append(sat)

    return {
        "gravels_infos": np.stack(
            [
                np.concatenate(miny),
                np.concatenate(maxy),
                np.concatenate(minx),
                np.concatenate(maxx),
                np.concatenate(val),
            ],
            1,
        ),
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str]:
    return "gravel_roi", "number_of_patches"

class RandomGridShuffle (grid=(3, 3), always_apply=False, p=0.5) [view source on GitHub]

Randomly shuffles the grid's cells on an image, mask, or keypoints, effectively rearranging patches within the image. This transformation divides the image into a grid and then permutes these grid cells based on a random mapping.

Parameters:

Name Type Description
grid Tuple[int, int]

Size of the grid for splitting the image into cells. Each cell is shuffled randomly.

p float

Probability that the transform will be applied.

Targets

image, mask, keypoints

Image types: uint8, float32

Examples:

Python
>>> import albumentations as A
>>> transform = A.Compose([
    A.RandomGridShuffle(grid=(3, 3), always_apply=False, p=1.0)
])
>>> transformed = transform(image=my_image, mask=my_mask)
>>> image, mask = transformed['image'], transformed['mask']
# This will shuffle the 3x3 grid cells of `my_image` and `my_mask` randomly.
# Mask and image are shuffled in a consistent way

Note

This transform could be useful when only micro features are important for the model, and memorizing the global structure could be harmful. For example: - Identifying the type of cell phone used to take a picture based on micro artifacts generated by phone post-processing algorithms, rather than the semantic features of the photo. See more at https://ieeexplore.ieee.org/abstract/document/8622031 - Identifying stress, glucose, hydration levels based on skin images.

Source code in albumentations/augmentations/transforms.py
Python
class RandomGridShuffle(DualTransform):
    """Randomly shuffles the grid's cells on an image, mask, or keypoints,
    effectively rearranging patches within the image.
    This transformation divides the image into a grid and then permutes these grid cells based on a random mapping.


    Args:
        grid (Tuple[int, int]): Size of the grid for splitting the image into cells. Each cell is shuffled randomly.
        p (float): Probability that the transform will be applied.

    Targets:
        image, mask, keypoints

    Image types:
        uint8, float32

    Examples:
        >>> import albumentations as A
        >>> transform = A.Compose([
            A.RandomGridShuffle(grid=(3, 3), always_apply=False, p=1.0)
        ])
        >>> transformed = transform(image=my_image, mask=my_mask)
        >>> image, mask = transformed['image'], transformed['mask']
        # This will shuffle the 3x3 grid cells of `my_image` and `my_mask` randomly.
        # Mask and image are shuffled in a consistent way
    Note:
        This transform could be useful when only micro features are important for the model, and memorizing
        the global structure could be harmful. For example:
        - Identifying the type of cell phone used to take a picture based on micro artifacts generated by
        phone post-processing algorithms, rather than the semantic features of the photo.
        See more at https://ieeexplore.ieee.org/abstract/document/8622031
        - Identifying stress, glucose, hydration levels based on skin images.
    """

    class InitSchema(BaseTransformInitSchema):
        grid: OnePlusIntRangeType = (3, 3)

    _targets = (Targets.IMAGE, Targets.MASK, Targets.KEYPOINTS)

    def __init__(self, grid: Tuple[int, int] = (3, 3), always_apply: bool = False, p: float = 0.5):
        super().__init__(always_apply=always_apply, p=p)
        self.grid = grid

    def apply(self, img: np.ndarray, tiles: np.ndarray, mapping: List[int], **params: Any) -> np.ndarray:
        return F.swap_tiles_on_image(img, tiles, mapping)

    def apply_to_mask(self, mask: np.ndarray, tiles: np.ndarray, mapping: List[int], **params: Any) -> np.ndarray:
        return F.swap_tiles_on_image(mask, tiles, mapping)

    def apply_to_keypoint(
        self,
        keypoint: KeypointInternalType,
        tiles: np.ndarray,
        mapping: List[int],
        **params: Any,
    ) -> KeypointInternalType:
        x, y = keypoint[:2]

        # Find which original tile the keypoint belongs to
        for original_index, new_index in enumerate(mapping):
            start_y, start_x, end_y, end_x = tiles[original_index]
            # check if the keypoint is in this tile
            if start_y <= y < end_y and start_x <= x < end_x:
                # Get the new tile's coordinates
                new_start_y, new_start_x = tiles[new_index][:2]

                # Map the keypoint to the new tile's position
                new_x = (x - start_x) + new_start_x
                new_y = (y - start_y) + new_start_y

                return (new_x, new_y, *keypoint[2:])

        # If the keypoint wasn't in any tile (shouldn't happen), log a warning for debugging purposes
        warn(
            "Keypoint not in any tile, returning it unchanged. This is unexpected and should be investigated.",
            RuntimeWarning,
        )
        return keypoint

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, np.ndarray]:
        height, weight = params["image"].shape[:2]
        random_state = random_utils.get_random_state()
        original_tiles = F.split_uniform_grid(
            (height, weight),
            self.grid,
            random_state=random_state,
        )
        shape_groups = F.create_shape_groups(original_tiles)
        mapping = F.shuffle_tiles_within_shape_groups(shape_groups, random_state=random_state)

        return {"tiles": original_tiles, "mapping": mapping}

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_transform_init_args_names(self) -> Tuple[str, ...]:
        return ("grid",)

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, tiles, mapping, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, tiles: np.ndarray, mapping: List[int], **params: Any) -> np.ndarray:
    return F.swap_tiles_on_image(img, tiles, mapping)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, np.ndarray]:
    height, weight = params["image"].shape[:2]
    random_state = random_utils.get_random_state()
    original_tiles = F.split_uniform_grid(
        (height, weight),
        self.grid,
        random_state=random_state,
    )
    shape_groups = F.create_shape_groups(original_tiles)
    mapping = F.shuffle_tiles_within_shape_groups(shape_groups, random_state=random_state)

    return {"tiles": original_tiles, "mapping": mapping}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, ...]:
    return ("grid",)

class RandomRain (slant_lower=-10, slant_upper=10, drop_length=20, drop_width=1, drop_color=(200, 200, 200), blur_value=7, brightness_coefficient=0.7, rain_type=None, always_apply=False, p=0.5) [view source on GitHub]

Adds rain effects.

From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

Parameters:

Name Type Description
slant_lower int

should be in range [-20, 20].

slant_upper int

should be in range [-20, 20].

drop_length int

should be in range [0, 100].

drop_width int

should be in range [1, 5].

drop_color list of (r, g, b

rain lines color.

blur_value int

rainy view are blurry

brightness_coefficient float

rainy days are usually shady. Should be in range [0, 1].

rain_type Optional[Literal['drizzle', 'heavy', 'torrential']]

One of [None, "drizzle", "heavy", "torrential"]

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class RandomRain(ImageOnlyTransform):
    """Adds rain effects.

    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

    Args:
        slant_lower: should be in range [-20, 20].
        slant_upper: should be in range [-20, 20].
        drop_length: should be in range [0, 100].
        drop_width: should be in range [1, 5].
        drop_color (list of (r, g, b)): rain lines color.
        blur_value (int): rainy view are blurry
        brightness_coefficient (float): rainy days are usually shady. Should be in range [0, 1].
        rain_type: One of [None, "drizzle", "heavy", "torrential"]

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        slant_lower: int = Field(default=-10, description="Lower bound for rain slant angle", ge=-20, le=20)
        slant_upper: int = Field(default=10, description="Upper bound for rain slant angle", ge=-20, le=20)
        drop_length: int = Field(default=20, description="Length of raindrops", ge=0, le=100)
        drop_width: int = Field(default=1, description="Width of raindrops", ge=1, le=5)
        drop_color: Tuple[int, int, int] = Field(default=(200, 200, 200), description="Color of raindrops")
        blur_value: int = Field(default=7, description="Blur value for simulating rain effect", ge=0)
        brightness_coefficient: float = Field(
            default=0.7,
            description="Brightness coefficient for rainy effect",
            ge=0,
            le=1,
        )
        rain_type: Optional[RainMode] = Field(default=None, description="Type of rain to simulate")

        @model_validator(mode="after")
        def validate_slant_range_and_rain_type(self) -> Self:
            if self.slant_lower >= self.slant_upper:
                msg = "slant_upper must be greater than or equal to slant_lower."
                raise ValueError(msg)
            return self

    def __init__(
        self,
        slant_lower: int = -10,
        slant_upper: int = 10,
        drop_length: int = 20,
        drop_width: int = 1,
        drop_color: Tuple[int, int, int] = (200, 200, 200),
        blur_value: int = 7,
        brightness_coefficient: float = 0.7,
        rain_type: Optional[RainMode] = None,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.slant_lower = slant_lower
        self.slant_upper = slant_upper
        self.drop_length = drop_length
        self.drop_width = drop_width
        self.drop_color = drop_color
        self.blur_value = blur_value
        self.brightness_coefficient = brightness_coefficient
        self.rain_type = rain_type

    def apply(
        self,
        img: np.ndarray,
        slant: int = 10,
        drop_length: int = 20,
        rain_drops: Optional[List[Tuple[int, int]]] = None,
        **params: Any,
    ) -> np.ndarray:
        if rain_drops is None:
            rain_drops = []
        return F.add_rain(
            img,
            slant,
            drop_length,
            self.drop_width,
            self.drop_color,
            self.blur_value,
            self.brightness_coefficient,
            rain_drops,
        )

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
        img = params["image"]
        slant = int(random.uniform(self.slant_lower, self.slant_upper))

        height, width = img.shape[:2]
        area = height * width

        if self.rain_type == "drizzle":
            num_drops = area // 770
            drop_length = 10
        elif self.rain_type == "heavy":
            num_drops = width * height // 600
            drop_length = 30
        elif self.rain_type == "torrential":
            num_drops = area // 500
            drop_length = 60
        else:
            drop_length = self.drop_length
            num_drops = area // 600

        rain_drops = []

        for _ in range(num_drops):  # If You want heavy rain, try increasing this
            x = random_utils.randint(slant, width + 1) if slant < 0 else random_utils.randint(0, width - slant + 1)

            y = random_utils.randint(0, height - drop_length + 1)

            rain_drops.append((x, y))

        return {"drop_length": drop_length, "slant": slant, "rain_drops": rain_drops}

    def get_transform_init_args_names(self) -> Tuple[str, ...]:
        return (
            "slant_lower",
            "slant_upper",
            "drop_length",
            "drop_width",
            "drop_color",
            "blur_value",
            "brightness_coefficient",
            "rain_type",
        )

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, slant=10, drop_length=20, rain_drops=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    slant: int = 10,
    drop_length: int = 20,
    rain_drops: Optional[List[Tuple[int, int]]] = None,
    **params: Any,
) -> np.ndarray:
    if rain_drops is None:
        rain_drops = []
    return F.add_rain(
        img,
        slant,
        drop_length,
        self.drop_width,
        self.drop_color,
        self.blur_value,
        self.brightness_coefficient,
        rain_drops,
    )

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
    img = params["image"]
    slant = int(random.uniform(self.slant_lower, self.slant_upper))

    height, width = img.shape[:2]
    area = height * width

    if self.rain_type == "drizzle":
        num_drops = area // 770
        drop_length = 10
    elif self.rain_type == "heavy":
        num_drops = width * height // 600
        drop_length = 30
    elif self.rain_type == "torrential":
        num_drops = area // 500
        drop_length = 60
    else:
        drop_length = self.drop_length
        num_drops = area // 600

    rain_drops = []

    for _ in range(num_drops):  # If You want heavy rain, try increasing this
        x = random_utils.randint(slant, width + 1) if slant < 0 else random_utils.randint(0, width - slant + 1)

        y = random_utils.randint(0, height - drop_length + 1)

        rain_drops.append((x, y))

    return {"drop_length": drop_length, "slant": slant, "rain_drops": rain_drops}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, ...]:
    return (
        "slant_lower",
        "slant_upper",
        "drop_length",
        "drop_width",
        "drop_color",
        "blur_value",
        "brightness_coefficient",
        "rain_type",
    )

class RandomShadow (shadow_roi=(0, 0.5, 1, 1), num_shadows_limit=(1, 2), num_shadows_lower=None, num_shadows_upper=None, shadow_dimension=5, always_apply=False, p=0.5) [view source on GitHub]

Simulates shadows for the image

Parameters:

Name Type Description
shadow_roi Tuple[float, float, float, float]

region of the image where shadows will appear. All values should be in range [0, 1].

num_shadows_limit Tuple[int, int]

Lower and upper limits for the possible number of shadows.

shadow_dimension int

number of edges in the shadow polygons

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class RandomShadow(ImageOnlyTransform):
    """Simulates shadows for the image

    Args:
        shadow_roi: region of the image where shadows
            will appear. All values should be in range [0, 1].
        num_shadows_limit: Lower and upper limits for the possible number of shadows.
        shadow_dimension: number of edges in the shadow polygons

    Targets:
        image

    Image types:
        uint8, float32

    Reference:
        https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library
    """

    class InitSchema(BaseTransformInitSchema):
        shadow_roi: Tuple[float, float, float, float] = Field(
            default=(0, 0.5, 1, 1),
            description="Region of the image where shadows will appear",
        )
        num_shadows_limit: Tuple[int, int] = Field(default=(1, 2))
        num_shadows_lower: Optional[int] = Field(
            default=None,
            description="Lower limit for the possible number of shadows",
        )
        num_shadows_upper: Optional[int] = Field(
            default=None,
            description="Upper limit for the possible number of shadows",
        )
        shadow_dimension: int = Field(default=5, description="Number of edges in the shadow polygons", gt=0)

        @model_validator(mode="after")
        def validate_shadows(self) -> Self:
            if self.num_shadows_limit[0] > self.num_shadows_limit[1]:
                msg = "num_shadows_limit[0] must be less than or equal to num_shadows_limit[1]."
                raise ValueError(msg)

            shadow_lower_x, shadow_lower_y, shadow_upper_x, shadow_upper_y = self.shadow_roi

            if not 0 <= shadow_lower_x <= shadow_upper_x <= 1 or not 0 <= shadow_lower_y <= shadow_upper_y <= 1:
                raise ValueError(f"Invalid shadow_roi. Got: {self.shadow_roi}")

            if self.num_shadows_lower is not None or self.num_shadows_upper is not None:
                warn(
                    "`num_shadows_lower` and `num_shadows_upper` are deprecated. "
                    "Use `num_shadows_limit` as tuple (num_shadows_lower, num_shadows_upper) instead.",
                    DeprecationWarning,
                    stacklevel=2,
                )
                self.num_shadows_limit = cast(Tuple[int, int], (self.num_shadows_lower, self.num_shadows_upper))
                self.num_shadows_lower = None
                self.num_shadows_upper = None

            return self

    def __init__(
        self,
        shadow_roi: Tuple[float, float, float, float] = (0, 0.5, 1, 1),
        num_shadows_limit: Tuple[int, int] = (1, 2),
        num_shadows_lower: Optional[int] = None,
        num_shadows_upper: Optional[int] = None,
        shadow_dimension: int = 5,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)

        self.shadow_roi = shadow_roi
        self.shadow_dimension = shadow_dimension
        self.num_shadows_limit = num_shadows_limit

    def apply(self, img: np.ndarray, vertices_list: Optional[List[np.ndarray]] = None, **params: Any) -> np.ndarray:
        if vertices_list is None:
            vertices_list = []
        return F.add_shadow(img, vertices_list)

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, List[np.ndarray]]:
        img = params["image"]
        height, width = img.shape[:2]

        num_shadows = random_utils.randint(self.num_shadows_limit[0], self.num_shadows_limit[1] + 1)

        x_min, y_min, x_max, y_max = self.shadow_roi

        x_min = int(x_min * width)
        x_max = int(x_max * width)
        y_min = int(y_min * height)
        y_max = int(y_max * height)

        vertices_list = [
            np.stack(
                [
                    random_utils.randint(x_min, x_max, size=5),
                    random_utils.randint(y_min, y_max, size=5),
                ],
                axis=1,
            )
            for _ in range(num_shadows)
        ]

        return {"vertices_list": vertices_list}

    def get_transform_init_args_names(self) -> Tuple[str, ...]:
        return (
            "shadow_roi",
            "num_shadows_limit",
            "shadow_dimension",
        )

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, vertices_list=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, vertices_list: Optional[List[np.ndarray]] = None, **params: Any) -> np.ndarray:
    if vertices_list is None:
        vertices_list = []
    return F.add_shadow(img, vertices_list)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, List[np.ndarray]]:
    img = params["image"]
    height, width = img.shape[:2]

    num_shadows = random_utils.randint(self.num_shadows_limit[0], self.num_shadows_limit[1] + 1)

    x_min, y_min, x_max, y_max = self.shadow_roi

    x_min = int(x_min * width)
    x_max = int(x_max * width)
    y_min = int(y_min * height)
    y_max = int(y_max * height)

    vertices_list = [
        np.stack(
            [
                random_utils.randint(x_min, x_max, size=5),
                random_utils.randint(y_min, y_max, size=5),
            ],
            axis=1,
        )
        for _ in range(num_shadows)
    ]

    return {"vertices_list": vertices_list}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, ...]:
    return (
        "shadow_roi",
        "num_shadows_limit",
        "shadow_dimension",
    )

class RandomSnow (snow_point_lower=0.1, snow_point_upper=0.3, brightness_coeff=2.5, always_apply=False, p=0.5) [view source on GitHub]

Bleach out some pixel values simulating snow.

From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

Parameters:

Name Type Description
snow_point_lower float

lower_bond of the amount of snow. Should be in [0, 1] range

snow_point_upper float

upper_bond of the amount of snow. Should be in [0, 1] range

brightness_coeff float

larger number will lead to a more snow on the image. Should be >= 0

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class RandomSnow(ImageOnlyTransform):
    """Bleach out some pixel values simulating snow.

    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

    Args:
        snow_point_lower: lower_bond of the amount of snow. Should be in [0, 1] range
        snow_point_upper: upper_bond of the amount of snow. Should be in [0, 1] range
        brightness_coeff: larger number will lead to a more snow on the image. Should be >= 0

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        snow_point_lower: float = Field(default=0.1, description="Lower bound of the amount of snow", ge=0, le=1)
        snow_point_upper: float = Field(default=0.3, description="Upper bound of the amount of snow", ge=0, le=1)
        brightness_coeff: float = Field(default=2.5, description="Brightness coefficient, must be >= 0", ge=0)

        @model_validator(mode="after")
        def validate_snow_points(self) -> Self:
            if self.snow_point_lower > self.snow_point_upper:
                msg = "snow_point_lower must be less than or equal to snow_point_upper."
                raise ValueError(msg)
            return self

    def __init__(
        self,
        snow_point_lower: float = 0.1,
        snow_point_upper: float = 0.3,
        brightness_coeff: float = 2.5,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply, p)

        self.snow_point_lower = snow_point_lower
        self.snow_point_upper = snow_point_upper
        self.brightness_coeff = brightness_coeff

    def apply(self, img: np.ndarray, snow_point: float = 0.1, **params: Any) -> np.ndarray:
        return F.add_snow(img, snow_point, self.brightness_coeff)

    def get_params(self) -> Dict[str, np.ndarray]:
        return {"snow_point": random.uniform(self.snow_point_lower, self.snow_point_upper)}

    def get_transform_init_args_names(self) -> Tuple[str, str, str]:
        return ("snow_point_lower", "snow_point_upper", "brightness_coeff")

apply (self, img, snow_point=0.1, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, snow_point: float = 0.1, **params: Any) -> np.ndarray:
    return F.add_snow(img, snow_point, self.brightness_coeff)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, np.ndarray]:
    return {"snow_point": random.uniform(self.snow_point_lower, self.snow_point_upper)}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str]:
    return ("snow_point_lower", "snow_point_upper", "brightness_coeff")

class RandomSunFlare (flare_roi=(0, 0, 1, 0.5), angle_lower=0, angle_upper=1, num_flare_circles_lower=6, num_flare_circles_upper=10, src_radius=400, src_color=(255, 255, 255), always_apply=False, p=0.5) [view source on GitHub]

Simulates Sun Flare for the image

From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

Parameters:

Name Type Description
flare_roi Tuple[float, float, float, float]

region of the image where flare will appear (x_min, y_min, x_max, y_max). All values should be in range [0, 1].

angle_lower float

should be in range [0, angle_upper].

angle_upper float

should be in range [angle_lower, 1].

num_flare_circles_lower int

lower limit for the number of flare circles. Should be in range [0, num_flare_circles_upper].

num_flare_circles_upper int

upper limit for the number of flare circles. Should be in range [num_flare_circles_lower, inf].

src_radius int
src_color Tuple[int, int, int]

color of the flare

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class RandomSunFlare(ImageOnlyTransform):
    """Simulates Sun Flare for the image

    From https://github.com/UjjwalSaxena/Automold--Road-Augmentation-Library

    Args:
        flare_roi: region of the image where flare will appear (x_min, y_min, x_max, y_max).
            All values should be in range [0, 1].
        angle_lower: should be in range [0, `angle_upper`].
        angle_upper: should be in range [`angle_lower`, 1].
        num_flare_circles_lower: lower limit for the number of flare circles.
            Should be in range [0, `num_flare_circles_upper`].
        num_flare_circles_upper: upper limit for the number of flare circles.
            Should be in range [`num_flare_circles_lower`, inf].
        src_radius:
        src_color: color of the flare

    Targets:
        image

    Image types:
        uint8, float32

    """

    class InitSchema(BaseTransformInitSchema):
        flare_roi: Tuple[float, float, float, float] = Field(
            default=(0, 0, 1, 0.5),
            description="Region of the image where flare will appear",
        )
        angle_lower: float = Field(default=0, description="Lower bound for the angle", ge=0, le=1)
        angle_upper: float = Field(default=1, description="Upper bound for the angle", ge=0, le=1)
        num_flare_circles_lower: int = Field(default=6, description="Lower limit for the number of flare circles", ge=0)
        num_flare_circles_upper: int = Field(
            default=10,
            description="Upper limit for the number of flare circles",
            gt=0,
        )
        src_radius: int = Field(default=400, description="Source radius for the flare")
        src_color: Tuple[int, int, int] = Field(default=(255, 255, 255), description="Color of the flare")

        @model_validator(mode="after")
        def validate_parameters(self) -> Self:
            flare_center_lower_x, flare_center_lower_y, flare_center_upper_x, flare_center_upper_y = self.flare_roi
            if (
                not 0 <= flare_center_lower_x < flare_center_upper_x <= 1
                or not 0 <= flare_center_lower_y < flare_center_upper_y <= 1
            ):
                raise ValueError(f"Invalid flare_roi. Got: {self.flare_roi}")
            if self.angle_lower >= self.angle_upper:
                raise ValueError(
                    f"angle_upper must be greater than angle_lower. Got: {self.angle_lower}, {self.angle_upper}",
                )
            if self.num_flare_circles_lower >= self.num_flare_circles_upper:
                msg = "num_flare_circles_upper must be greater than num_flare_circles_lower."
                raise ValueError(msg)
            return self

    def __init__(
        self,
        flare_roi: Tuple[float, float, float, float] = (0, 0, 1, 0.5),
        angle_lower: float = 0,
        angle_upper: float = 1,
        num_flare_circles_lower: int = 6,
        num_flare_circles_upper: int = 10,
        src_radius: int = 400,
        src_color: Tuple[int, int, int] = (255, 255, 255),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)

        self.angle_lower = angle_lower
        self.angle_upper = angle_upper
        self.num_flare_circles_lower = num_flare_circles_lower
        self.num_flare_circles_upper = num_flare_circles_upper
        self.src_radius = src_radius
        self.src_color = src_color
        self.flare_roi = flare_roi

    def apply(
        self,
        img: np.ndarray,
        flare_center_x: float = 0.5,
        flare_center_y: float = 0.5,
        circles: Optional[List[Any]] = None,
        **params: Any,
    ) -> np.ndarray:
        if circles is None:
            circles = []
        return F.add_sun_flare(
            img,
            flare_center_x,
            flare_center_y,
            self.src_radius,
            self.src_color,
            circles,
        )

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
        img = params["image"]
        height, width = img.shape[:2]

        angle = 2 * math.pi * random.uniform(self.angle_lower, self.angle_upper)

        (flare_center_lower_x, flare_center_lower_y, flare_center_upper_x, flare_center_upper_y) = self.flare_roi

        flare_center_x = random.uniform(flare_center_lower_x, flare_center_upper_x)
        flare_center_y = random.uniform(flare_center_lower_y, flare_center_upper_y)

        flare_center_x = int(width * flare_center_x)
        flare_center_y = int(height * flare_center_y)

        num_circles = random_utils.randint(self.num_flare_circles_lower, self.num_flare_circles_upper + 1)

        circles = []

        x = []
        y = []

        def line(t: float) -> Tuple[float, float]:
            return (flare_center_x + t * math.cos(angle), flare_center_y + t * math.sin(angle))

        for t_val in range(-flare_center_x, width - flare_center_x, 10):
            rand_x, rand_y = line(t_val)
            x.append(rand_x)
            y.append(rand_y)

        for _ in range(num_circles):
            alpha = random_utils.uniform(0.05, 0.2)
            r = random_utils.randint(0, len(x))
            rad = random.randint(1, max(height // 100 - 2, 2))

            r_color = random.randint(max(self.src_color[0] - 50, 0), self.src_color[0])
            g_color = random.randint(max(self.src_color[1] - 50, 0), self.src_color[1])
            b_color = random.randint(max(self.src_color[2] - 50, 0), self.src_color[2])

            circles += [
                (
                    alpha,
                    (int(x[r]), int(y[r])),
                    pow(rad, 3),
                    (r_color, g_color, b_color),
                ),
            ]

        return {
            "circles": circles,
            "flare_center_x": flare_center_x,
            "flare_center_y": flare_center_y,
        }

    def get_transform_init_args(self) -> Dict[str, Any]:
        return {
            "flare_roi": self.flare_roi,
            "angle_lower": self.angle_lower,
            "angle_upper": self.angle_upper,
            "num_flare_circles_lower": self.num_flare_circles_lower,
            "num_flare_circles_upper": self.num_flare_circles_upper,
            "src_radius": self.src_radius,
            "src_color": self.src_color,
        }

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, flare_center_x=0.5, flare_center_y=0.5, circles=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    flare_center_x: float = 0.5,
    flare_center_y: float = 0.5,
    circles: Optional[List[Any]] = None,
    **params: Any,
) -> np.ndarray:
    if circles is None:
        circles = []
    return F.add_sun_flare(
        img,
        flare_center_x,
        flare_center_y,
        self.src_radius,
        self.src_color,
        circles,
    )

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
    img = params["image"]
    height, width = img.shape[:2]

    angle = 2 * math.pi * random.uniform(self.angle_lower, self.angle_upper)

    (flare_center_lower_x, flare_center_lower_y, flare_center_upper_x, flare_center_upper_y) = self.flare_roi

    flare_center_x = random.uniform(flare_center_lower_x, flare_center_upper_x)
    flare_center_y = random.uniform(flare_center_lower_y, flare_center_upper_y)

    flare_center_x = int(width * flare_center_x)
    flare_center_y = int(height * flare_center_y)

    num_circles = random_utils.randint(self.num_flare_circles_lower, self.num_flare_circles_upper + 1)

    circles = []

    x = []
    y = []

    def line(t: float) -> Tuple[float, float]:
        return (flare_center_x + t * math.cos(angle), flare_center_y + t * math.sin(angle))

    for t_val in range(-flare_center_x, width - flare_center_x, 10):
        rand_x, rand_y = line(t_val)
        x.append(rand_x)
        y.append(rand_y)

    for _ in range(num_circles):
        alpha = random_utils.uniform(0.05, 0.2)
        r = random_utils.randint(0, len(x))
        rad = random.randint(1, max(height // 100 - 2, 2))

        r_color = random.randint(max(self.src_color[0] - 50, 0), self.src_color[0])
        g_color = random.randint(max(self.src_color[1] - 50, 0), self.src_color[1])
        b_color = random.randint(max(self.src_color[2] - 50, 0), self.src_color[2])

        circles += [
            (
                alpha,
                (int(x[r]), int(y[r])),
                pow(rad, 3),
                (r_color, g_color, b_color),
            ),
        ]

    return {
        "circles": circles,
        "flare_center_x": flare_center_x,
        "flare_center_y": flare_center_y,
    }

class RandomToneCurve (scale=0.1, always_apply=False, p=0.5) [view source on GitHub]

Randomly change the relationship between bright and dark areas of the image by manipulating its tone curve.

Parameters:

Name Type Description
scale float

standard deviation of the normal distribution. Used to sample random distances to move two control points that modify the image's curve. Values should be in range [0, 1]. Default: 0.1

Targets

image

Image types: uint8

Source code in albumentations/augmentations/transforms.py
Python
class RandomToneCurve(ImageOnlyTransform):
    """Randomly change the relationship between bright and dark areas of the image by manipulating its tone curve.

    Args:
        scale: standard deviation of the normal distribution.
            Used to sample random distances to move two control points that modify the image's curve.
            Values should be in range [0, 1]. Default: 0.1

    Targets:
        image

    Image types:
        uint8

    """

    class InitSchema(BaseTransformInitSchema):
        scale: float = Field(
            default=0.1,
            description="Standard deviation of the normal distribution used to sample random distances",
            ge=0,
            le=1,
        )

    def __init__(
        self,
        scale: float = 0.1,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.scale = scale

    def apply(self, img: np.ndarray, low_y: float, high_y: float, **params: Any) -> np.ndarray:
        return F.move_tone_curve(img, low_y, high_y)

    def get_params(self) -> Dict[str, float]:
        return {
            "low_y": np.clip(random_utils.normal(loc=0.25, scale=self.scale), 0, 1),
            "high_y": np.clip(random_utils.normal(loc=0.75, scale=self.scale), 0, 1),
        }

    def get_transform_init_args_names(self) -> Tuple[str]:
        return ("scale",)

apply (self, img, low_y, high_y, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, low_y: float, high_y: float, **params: Any) -> np.ndarray:
    return F.move_tone_curve(img, low_y, high_y)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    return {
        "low_y": np.clip(random_utils.normal(loc=0.25, scale=self.scale), 0, 1),
        "high_y": np.clip(random_utils.normal(loc=0.75, scale=self.scale), 0, 1),
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str]:
    return ("scale",)

class RingingOvershoot (blur_limit=(7, 15), cutoff=(0.7853981633974483, 1.5707963267948966), always_apply=False, p=0.5) [view source on GitHub]

Create ringing or overshoot artefacts by conlvolving image with 2D sinc filter.

Parameters:

Name Type Description
blur_limit Union[int, Tuple[int, int]]

maximum kernel size for sinc filter. Should be in range [3, inf). Default: (7, 15).

cutoff Union[float, Tuple[float, float]]

range to choose the cutoff frequency in radians. Should be in range (0, np.pi) Default: (np.pi / 4, np.pi / 2).

p float

probability of applying the transform. Default: 0.5.

Reference

dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter https://arxiv.org/abs/2107.10833

Targets

image

Source code in albumentations/augmentations/transforms.py
Python
class RingingOvershoot(ImageOnlyTransform):
    """Create ringing or overshoot artefacts by conlvolving image with 2D sinc filter.

    Args:
        blur_limit: maximum kernel size for sinc filter.
            Should be in range [3, inf). Default: (7, 15).
        cutoff: range to choose the cutoff frequency in radians.
            Should be in range (0, np.pi)
            Default: (np.pi / 4, np.pi / 2).
        p: probability of applying the transform. Default: 0.5.

    Reference:
        dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter
        https://arxiv.org/abs/2107.10833

    Targets:
        image

    """

    class InitSchema(BlurInitSchema):
        blur_limit: ScaleIntType = Field(default=(7, 15), description="Maximum kernel size for sinc filter.")
        cutoff: ScaleFloatType = Field(default=(np.pi / 4, np.pi / 2), description="Cutoff frequency range in radians.")

        @field_validator("cutoff")
        @classmethod
        def check_cutoff(cls, v: ScaleFloatType, info: ValidationInfo) -> Tuple[float, float]:
            bounds = 0, np.pi
            result = to_tuple(v, v)
            check_range(result, *bounds, info.field_name)
            return result

    def __init__(
        self,
        blur_limit: ScaleIntType = (7, 15),
        cutoff: ScaleFloatType = (np.pi / 4, np.pi / 2),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.blur_limit = cast(Tuple[int, int], blur_limit)
        self.cutoff = cast(Tuple[float, float], cutoff)

    def get_params(self) -> Dict[str, np.ndarray]:
        ksize = random.randrange(self.blur_limit[0], self.blur_limit[1] + 1, 2)
        if ksize % 2 == 0:
            raise ValueError(f"Kernel size must be odd. Got: {ksize}")

        cutoff = random.uniform(*self.cutoff)

        # From dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter
        with np.errstate(divide="ignore", invalid="ignore"):
            kernel = np.fromfunction(
                lambda x, y: cutoff
                * special.j1(cutoff * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2))
                / (2 * np.pi * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2)),
                [ksize, ksize],
            )
        kernel[(ksize - 1) // 2, (ksize - 1) // 2] = cutoff**2 / (4 * np.pi)

        # Normalize kernel
        kernel = kernel.astype(np.float32) / np.sum(kernel)

        return {"kernel": kernel}

    def apply(self, img: np.ndarray, kernel: Optional[int] = None, **params: Any) -> np.ndarray:
        return F.convolve(img, kernel)

    def get_transform_init_args_names(self) -> Tuple[str, str]:
        return ("blur_limit", "cutoff")

apply (self, img, kernel=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, kernel: Optional[int] = None, **params: Any) -> np.ndarray:
    return F.convolve(img, kernel)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, np.ndarray]:
    ksize = random.randrange(self.blur_limit[0], self.blur_limit[1] + 1, 2)
    if ksize % 2 == 0:
        raise ValueError(f"Kernel size must be odd. Got: {ksize}")

    cutoff = random.uniform(*self.cutoff)

    # From dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter
    with np.errstate(divide="ignore", invalid="ignore"):
        kernel = np.fromfunction(
            lambda x, y: cutoff
            * special.j1(cutoff * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2))
            / (2 * np.pi * np.sqrt((x - (ksize - 1) / 2) ** 2 + (y - (ksize - 1) / 2) ** 2)),
            [ksize, ksize],
        )
    kernel[(ksize - 1) // 2, (ksize - 1) // 2] = cutoff**2 / (4 * np.pi)

    # Normalize kernel
    kernel = kernel.astype(np.float32) / np.sum(kernel)

    return {"kernel": kernel}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str]:
    return ("blur_limit", "cutoff")

class Sharpen (alpha=(0.2, 0.5), lightness=(0.5, 1.0), always_apply=False, p=0.5) [view source on GitHub]

Sharpen the input image and overlays the result with the original image.

Parameters:

Name Type Description
alpha Tuple[float, float]

range to choose the visibility of the sharpened image. At 0, only the original image is visible, at 1.0 only its sharpened version is visible. Default: (0.2, 0.5).

lightness Tuple[float, float]

range to choose the lightness of the sharpened image. Default: (0.5, 1.0).

p float

probability of applying the transform. Default: 0.5.

Targets

image

Source code in albumentations/augmentations/transforms.py
Python
class Sharpen(ImageOnlyTransform):
    """Sharpen the input image and overlays the result with the original image.

    Args:
        alpha: range to choose the visibility of the sharpened image. At 0, only the original image is
            visible, at 1.0 only its sharpened version is visible. Default: (0.2, 0.5).
        lightness: range to choose the lightness of the sharpened image. Default: (0.5, 1.0).
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    """

    class InitSchema(BaseTransformInitSchema):
        alpha: ZeroOneRangeType = (0.2, 0.5)
        lightness: NonNegativeFloatRangeType = (0.5, 1.0)

    def __init__(
        self,
        alpha: Tuple[float, float] = (0.2, 0.5),
        lightness: Tuple[float, float] = (0.5, 1.0),
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.alpha = alpha
        self.lightness = lightness

    @staticmethod
    def __generate_sharpening_matrix(alpha_sample: np.ndarray, lightness_sample: np.ndarray) -> np.ndarray:
        matrix_nochange = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32)
        matrix_effect = np.array(
            [[-1, -1, -1], [-1, 8 + lightness_sample, -1], [-1, -1, -1]],
            dtype=np.float32,
        )

        return (1 - alpha_sample) * matrix_nochange + alpha_sample * matrix_effect

    def get_params(self) -> Dict[str, np.ndarray]:
        alpha = random.uniform(*self.alpha)
        lightness = random.uniform(*self.lightness)
        sharpening_matrix = self.__generate_sharpening_matrix(alpha_sample=alpha, lightness_sample=lightness)
        return {"sharpening_matrix": sharpening_matrix}

    def apply(self, img: np.ndarray, sharpening_matrix: Optional[np.ndarray] = None, **params: Any) -> np.ndarray:
        return F.convolve(img, sharpening_matrix)

    def get_transform_init_args_names(self) -> Tuple[str, str]:
        return ("alpha", "lightness")

apply (self, img, sharpening_matrix=None, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, sharpening_matrix: Optional[np.ndarray] = None, **params: Any) -> np.ndarray:
    return F.convolve(img, sharpening_matrix)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, np.ndarray]:
    alpha = random.uniform(*self.alpha)
    lightness = random.uniform(*self.lightness)
    sharpening_matrix = self.__generate_sharpening_matrix(alpha_sample=alpha, lightness_sample=lightness)
    return {"sharpening_matrix": sharpening_matrix}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str]:
    return ("alpha", "lightness")

class Solarize (threshold=(128, 128), always_apply=False, p=0.5) [view source on GitHub]

Invert all pixel values above a threshold.

Parameters:

Name Type Description
threshold Union[float, Tuple[float, float], int, Tuple[int, int]]

range for solarizing threshold. If threshold is a single value, the range will be [1, threshold]. Default: 128.

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: any

Source code in albumentations/augmentations/transforms.py
Python
class Solarize(ImageOnlyTransform):
    """Invert all pixel values above a threshold.

    Args:
        threshold: range for solarizing threshold.
            If threshold is a single value, the range will be [1, threshold]. Default: 128.
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        any

    """

    class InitSchema(BaseTransformInitSchema):
        threshold: OnePlusFloatRangeType = (128, 128)

    def __init__(self, threshold: ScaleType = (128, 128), always_apply: bool = False, p: float = 0.5):
        super().__init__(always_apply=always_apply, p=p)
        self.threshold = cast(Tuple[float, float], threshold)

    def apply(self, img: np.ndarray, threshold: int = 0, **params: Any) -> np.ndarray:
        return F.solarize(img, threshold)

    def get_params(self) -> Dict[str, float]:
        return {"threshold": random.uniform(self.threshold[0], self.threshold[1])}

    def get_transform_init_args_names(self) -> Tuple[str]:
        return ("threshold",)

apply (self, img, threshold=0, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, threshold: int = 0, **params: Any) -> np.ndarray:
    return F.solarize(img, threshold)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    return {"threshold": random.uniform(self.threshold[0], self.threshold[1])}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str]:
    return ("threshold",)

class Spatter (mean=(0.65, 0.65), std=(0.3, 0.3), gauss_sigma=(2, 2), cutout_threshold=(0.68, 0.68), intensity=(0.6, 0.6), mode='rain', color=None, always_apply=False, p=0.5) [view source on GitHub]

Apply spatter transform. It simulates corruption which can occlude a lens in the form of rain or mud.

Parameters:

Name Type Description
mean float, or tuple of floats

Mean value of normal distribution for generating liquid layer. If single float mean will be sampled from (0, mean) If tuple of float mean will be sampled from range (mean[0], mean[1]). If you want constant value use (mean, mean). Default (0.65, 0.65)

std float, or tuple of floats

Standard deviation value of normal distribution for generating liquid layer. If single float the number will be sampled from (0, std). If tuple of float std will be sampled from range (std[0], std[1]). If you want constant value use (std, std). Default: (0.3, 0.3).

gauss_sigma float, or tuple of floats

Sigma value for gaussian filtering of liquid layer. If single float the number will be sampled from (0, gauss_sigma). If tuple of float gauss_sigma will be sampled from range (gauss_sigma[0], gauss_sigma[1]). If you want constant value use (gauss_sigma, gauss_sigma). Default: (2, 3).

cutout_threshold float, or tuple of floats

Threshold for filtering liqued layer (determines number of drops). If single float it will used as cutout_threshold. If single float the number will be sampled from (0, cutout_threshold). If tuple of float cutout_threshold will be sampled from range (cutout_threshold[0], cutout_threshold[1]). If you want constant value use (cutout_threshold, cutout_threshold). Default: (0.68, 0.68).

intensity float, or tuple of floats

Intensity of corruption. If single float the number will be sampled from (0, intensity). If tuple of float intensity will be sampled from range (intensity[0], intensity[1]). If you want constant value use (intensity, intensity). Default: (0.6, 0.6).

mode string, or list of strings

Type of corruption. Currently, supported options are 'rain' and 'mud'. If list is provided type of corruption will be sampled list. Default: ("rain").

color list of (r, g, b) or dict or None

Corruption elements color. If list uses provided list as color for specified mode. If dict uses provided color for specified mode. Color for each specified mode should be provided in dict. If None uses default colors (rain: (238, 238, 175), mud: (20, 42, 63)).

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class Spatter(ImageOnlyTransform):
    """Apply spatter transform. It simulates corruption which can occlude a lens in the form of rain or mud.

    Args:
        mean (float, or tuple of floats): Mean value of normal distribution for generating liquid layer.
            If single float mean will be sampled from `(0, mean)`
            If tuple of float mean will be sampled from range `(mean[0], mean[1])`.
            If you want constant value use (mean, mean).
            Default (0.65, 0.65)
        std (float, or tuple of floats): Standard deviation value of normal distribution for generating liquid layer.
            If single float the number will be sampled from `(0, std)`.
            If tuple of float std will be sampled from range `(std[0], std[1])`.
            If you want constant value use (std, std).
            Default: (0.3, 0.3).
        gauss_sigma (float, or tuple of floats): Sigma value for gaussian filtering of liquid layer.
            If single float the number will be sampled from `(0, gauss_sigma)`.
            If tuple of float gauss_sigma will be sampled from range `(gauss_sigma[0], gauss_sigma[1])`.
            If you want constant value use (gauss_sigma, gauss_sigma).
            Default: (2, 3).
        cutout_threshold (float, or tuple of floats): Threshold for filtering liqued layer
            (determines number of drops). If single float it will used as cutout_threshold.
            If single float the number will be sampled from `(0, cutout_threshold)`.
            If tuple of float cutout_threshold will be sampled from range `(cutout_threshold[0], cutout_threshold[1])`.
            If you want constant value use `(cutout_threshold, cutout_threshold)`.
            Default: (0.68, 0.68).
        intensity (float, or tuple of floats): Intensity of corruption.
            If single float the number will be sampled from `(0, intensity)`.
            If tuple of float intensity will be sampled from range `(intensity[0], intensity[1])`.
            If you want constant value use `(intensity, intensity)`.
            Default: (0.6, 0.6).
        mode (string, or list of strings): Type of corruption. Currently, supported options are 'rain' and 'mud'.
             If list is provided type of corruption will be sampled list. Default: ("rain").
        color (list of (r, g, b) or dict or None): Corruption elements color.
            If list uses provided list as color for specified mode.
            If dict uses provided color for specified mode. Color for each specified mode should be provided in dict.
            If None uses default colors (rain: (238, 238, 175), mud: (20, 42, 63)).
        p (float): probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    Reference:
        https://arxiv.org/abs/1903.12261
        https://github.com/hendrycks/robustness/blob/master/ImageNet-C/create_c/make_imagenet_c.py

    """

    class InitSchema(BaseTransformInitSchema):
        mean: ZeroOneRangeType = (0.65, 0.65)
        std: ZeroOneRangeType = (0.3, 0.3)
        gauss_sigma: NonNegativeFloatRangeType = (2, 2)
        cutout_threshold: ZeroOneRangeType = (0.68, 0.68)
        intensity: ZeroOneRangeType = (0.6, 0.6)
        mode: Union[SpatterMode, Sequence[SpatterMode]] = Field(
            default="rain",
            description="Type of corruption ('rain', 'mud').",
        )
        color: Optional[Union[Sequence[int], Dict[str, Sequence[int]]]] = None

        @field_validator("mode")
        @classmethod
        def check_mode(cls, mode: Union[SpatterMode, Sequence[SpatterMode]]) -> Sequence[SpatterMode]:
            if isinstance(mode, str):
                return [mode]
            return mode

        @model_validator(mode="after")
        def check_color(self) -> Self:
            if self.color is None:
                self.color = {"rain": [238, 238, 175], "mud": [20, 42, 63]}

            elif isinstance(self.color, (list, tuple)) and len(self.mode) == 1:
                if len(self.color) != NUM_RGB_CHANNELS:
                    msg = "Color must be a list of three integers for RGB format."
                    raise ValueError(msg)
                self.color = {self.mode[0]: self.color}
            elif isinstance(self.color, dict):
                result = {}
                for mode in self.mode:
                    if mode not in self.color:
                        raise ValueError(f"Color for mode {mode} is not specified.")
                    if len(self.color[mode]) != NUM_RGB_CHANNELS:
                        raise ValueError(f"Color for mode {mode} must be in RGB format.")
                    result[mode] = self.color[mode]
            else:
                msg = "Color must be a list of RGB values or a dict mapping mode to RGB values."
                raise ValueError(msg)
            return self

    def __init__(
        self,
        mean: ScaleFloatType = (0.65, 0.65),
        std: ScaleFloatType = (0.3, 0.3),
        gauss_sigma: ScaleFloatType = (2, 2),
        cutout_threshold: ScaleFloatType = (0.68, 0.68),
        intensity: ScaleFloatType = (0.6, 0.6),
        mode: Union[SpatterMode, Sequence[SpatterMode]] = "rain",
        color: Optional[Union[Sequence[int], Dict[str, Sequence[int]]]] = None,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.mean = cast(Tuple[float, float], mean)
        self.std = cast(Tuple[float, float], std)
        self.gauss_sigma = cast(Tuple[float, float], gauss_sigma)
        self.cutout_threshold = cast(Tuple[float, float], cutout_threshold)
        self.intensity = cast(Tuple[float, float], intensity)
        self.mode = mode
        self.color = cast(Dict[str, Sequence[int]], color)

    def apply(
        self,
        img: np.ndarray,
        non_mud: Optional[np.ndarray] = None,
        mud: Optional[np.ndarray] = None,
        drops: Optional[np.ndarray] = None,
        mode: SpatterMode = "mud",
        **params: Dict[str, Any],
    ) -> np.ndarray:
        return F.spatter(img, non_mud, mud, drops, mode)

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
        height, width = params["image"].shape[:2]

        mean = random.uniform(self.mean[0], self.mean[1])
        std = random.uniform(self.std[0], self.std[1])
        cutout_threshold = random.uniform(self.cutout_threshold[0], self.cutout_threshold[1])
        sigma = random.uniform(self.gauss_sigma[0], self.gauss_sigma[1])
        mode = random.choice(self.mode)
        intensity = random.uniform(self.intensity[0], self.intensity[1])
        color = np.array(self.color[mode]) / 255.0

        liquid_layer = random_utils.normal(size=(height, width), loc=mean, scale=std)
        liquid_layer = gaussian_filter(liquid_layer, sigma=sigma, mode="nearest")
        liquid_layer[liquid_layer < cutout_threshold] = 0

        if mode == "rain":
            liquid_layer = (liquid_layer * 255).astype(np.uint8)
            dist = 255 - cv2.Canny(liquid_layer, 50, 150)
            dist = cv2.distanceTransform(dist, cv2.DIST_L2, 5)
            _, dist = cv2.threshold(dist, 20, 20, cv2.THRESH_TRUNC)
            dist = blur(dist, 3).astype(np.uint8)
            dist = F.equalize(dist)

            ker = np.array([[-2, -1, 0], [-1, 1, 1], [0, 1, 2]])
            dist = F.convolve(dist, ker)
            dist = blur(dist, 3).astype(np.float32)

            m = liquid_layer * dist
            m *= 1 / np.max(m, axis=(0, 1))

            drops = m[:, :, None] * color * intensity
            mud = None
            non_mud = None
        else:
            m = np.where(liquid_layer > cutout_threshold, 1, 0)
            m = gaussian_filter(m.astype(np.float32), sigma=sigma, mode="nearest")
            m[m < 1.2 * cutout_threshold] = 0
            m = m[..., np.newaxis]

            mud = m * color
            non_mud = 1 - m
            drops = None

        return {
            "non_mud": non_mud,
            "mud": mud,
            "drops": drops,
            "mode": mode,
        }

    def get_transform_init_args_names(self) -> Tuple[str, str, str, str, str, str, str]:
        return "mean", "std", "gauss_sigma", "intensity", "cutout_threshold", "mode", "color"

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, non_mud=None, mud=None, drops=None, mode='mud', **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    non_mud: Optional[np.ndarray] = None,
    mud: Optional[np.ndarray] = None,
    drops: Optional[np.ndarray] = None,
    mode: SpatterMode = "mud",
    **params: Dict[str, Any],
) -> np.ndarray:
    return F.spatter(img, non_mud, mud, drops, mode)

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
    height, width = params["image"].shape[:2]

    mean = random.uniform(self.mean[0], self.mean[1])
    std = random.uniform(self.std[0], self.std[1])
    cutout_threshold = random.uniform(self.cutout_threshold[0], self.cutout_threshold[1])
    sigma = random.uniform(self.gauss_sigma[0], self.gauss_sigma[1])
    mode = random.choice(self.mode)
    intensity = random.uniform(self.intensity[0], self.intensity[1])
    color = np.array(self.color[mode]) / 255.0

    liquid_layer = random_utils.normal(size=(height, width), loc=mean, scale=std)
    liquid_layer = gaussian_filter(liquid_layer, sigma=sigma, mode="nearest")
    liquid_layer[liquid_layer < cutout_threshold] = 0

    if mode == "rain":
        liquid_layer = (liquid_layer * 255).astype(np.uint8)
        dist = 255 - cv2.Canny(liquid_layer, 50, 150)
        dist = cv2.distanceTransform(dist, cv2.DIST_L2, 5)
        _, dist = cv2.threshold(dist, 20, 20, cv2.THRESH_TRUNC)
        dist = blur(dist, 3).astype(np.uint8)
        dist = F.equalize(dist)

        ker = np.array([[-2, -1, 0], [-1, 1, 1], [0, 1, 2]])
        dist = F.convolve(dist, ker)
        dist = blur(dist, 3).astype(np.float32)

        m = liquid_layer * dist
        m *= 1 / np.max(m, axis=(0, 1))

        drops = m[:, :, None] * color * intensity
        mud = None
        non_mud = None
    else:
        m = np.where(liquid_layer > cutout_threshold, 1, 0)
        m = gaussian_filter(m.astype(np.float32), sigma=sigma, mode="nearest")
        m[m < 1.2 * cutout_threshold] = 0
        m = m[..., np.newaxis]

        mud = m * color
        non_mud = 1 - m
        drops = None

    return {
        "non_mud": non_mud,
        "mud": mud,
        "drops": drops,
        "mode": mode,
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str, str, str, str, str]:
    return "mean", "std", "gauss_sigma", "intensity", "cutout_threshold", "mode", "color"

class Superpixels (p_replace=(0, 0.1), n_segments=(100, 100), max_size=128, interpolation=1, always_apply=False, p=0.5) [view source on GitHub]

Transform images partially/completely to their superpixel representation. This implementation uses skimage's version of the SLIC algorithm.

Parameters:

Name Type Description
p_replace float or tuple of float

Defines for any segment the probability that the pixels within that segment are replaced by their average color (otherwise, the pixels are not changed).

Examples:

  • A probability of 0.0 would mean, that the pixels in no segment are replaced by their average color (image is not changed at all).
  • A probability of 0.5 would mean, that around half of all segments are replaced by their average color.
  • A probability of 1.0 would mean, that all segments are replaced by their average color (resulting in a voronoi image).
    Behaviour based on chosen data types for this parameter:
        * If a ``float``, then that ``flat`` will always be used.
        * If ``tuple`` ``(a, b)``, then a random probability will be
          sampled from the interval ``[a, b]`` per image.
n_segments (tuple of int): Rough target number of how many superpixels to generate (the algorithm
    may deviate from this number). Lower value will lead to coarser superpixels.
    Higher values are computationally more intensive and will hence lead to a slowdown
    Then a value from the discrete interval ``[a..b]`` will be sampled per image.
    If input is a single integer, the range will be ``(1, n_segments)``.
    If interested in a fixed number of segments, use ``(n_segments, n_segments)``.
max_size (int or None): Maximum image size at which the augmentation is performed.
    If the width or height of an image exceeds this value, it will be
    downscaled before the augmentation so that the longest side matches `max_size`.
    This is done to speed up the process. The final output image has the same size as the input image.
    Note that in case `p_replace` is below ``1.0``,
    the down-/upscaling will affect the not-replaced pixels too.
    Use ``None`` to apply no down-/upscaling.
interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
    cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
    Default: cv2.INTER_LINEAR.
p (float): probability of applying the transform. Default: 0.5.

Targets

image

Source code in albumentations/augmentations/transforms.py
Python
class Superpixels(ImageOnlyTransform):
    """Transform images partially/completely to their superpixel representation.
    This implementation uses skimage's version of the SLIC algorithm.

    Args:
        p_replace (float or tuple of float): Defines for any segment the probability that the pixels within that
            segment are replaced by their average color (otherwise, the pixels are not changed).

    Examples:
                * A probability of ``0.0`` would mean, that the pixels in no
                  segment are replaced by their average color (image is not
                  changed at all).
                * A probability of ``0.5`` would mean, that around half of all
                  segments are replaced by their average color.
                * A probability of ``1.0`` would mean, that all segments are
                  replaced by their average color (resulting in a voronoi
                  image).
            Behaviour based on chosen data types for this parameter:
                * If a ``float``, then that ``flat`` will always be used.
                * If ``tuple`` ``(a, b)``, then a random probability will be
                  sampled from the interval ``[a, b]`` per image.
        n_segments (tuple of int): Rough target number of how many superpixels to generate (the algorithm
            may deviate from this number). Lower value will lead to coarser superpixels.
            Higher values are computationally more intensive and will hence lead to a slowdown
            Then a value from the discrete interval ``[a..b]`` will be sampled per image.
            If input is a single integer, the range will be ``(1, n_segments)``.
            If interested in a fixed number of segments, use ``(n_segments, n_segments)``.
        max_size (int or None): Maximum image size at which the augmentation is performed.
            If the width or height of an image exceeds this value, it will be
            downscaled before the augmentation so that the longest side matches `max_size`.
            This is done to speed up the process. The final output image has the same size as the input image.
            Note that in case `p_replace` is below ``1.0``,
            the down-/upscaling will affect the not-replaced pixels too.
            Use ``None`` to apply no down-/upscaling.
        interpolation (OpenCV flag): flag that is used to specify the interpolation algorithm. Should be one of:
            cv2.INTER_NEAREST, cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_LANCZOS4.
            Default: cv2.INTER_LINEAR.
        p (float): probability of applying the transform. Default: 0.5.

    Targets:
        image

    """

    class InitSchema(BaseTransformInitSchema):
        p_replace: ZeroOneRangeType = (0, 0.1)
        n_segments: OnePlusIntRangeType = (100, 100)
        max_size: Optional[int] = Field(default=128, ge=1, description="Maximum image size for the transformation.")
        interpolation: InterpolationType = cv2.INTER_LINEAR

    def __init__(
        self,
        p_replace: ScaleFloatType = (0, 0.1),
        n_segments: ScaleIntType = (100, 100),
        max_size: Optional[int] = 128,
        interpolation: int = cv2.INTER_LINEAR,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.p_replace = cast(Tuple[float, float], p_replace)
        self.n_segments = cast(Tuple[int, int], n_segments)
        self.max_size = max_size
        self.interpolation = interpolation

    def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
        return ("p_replace", "n_segments", "max_size", "interpolation")

    def get_params(self) -> Dict[str, Any]:
        n_segments = random_utils.randint(self.n_segments[0], self.n_segments[1] + 1)
        p = random.uniform(*self.p_replace)
        return {"replace_samples": random_utils.random(n_segments) < p, "n_segments": n_segments}

    def apply(
        self,
        img: np.ndarray,
        replace_samples: Sequence[bool] = (False,),
        n_segments: int = 1,
        **kwargs: Any,
    ) -> np.ndarray:
        return F.superpixels(img, n_segments, replace_samples, self.max_size, cast(int, self.interpolation))

apply (self, img, replace_samples=(False,), n_segments=1, **kwargs)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    replace_samples: Sequence[bool] = (False,),
    n_segments: int = 1,
    **kwargs: Any,
) -> np.ndarray:
    return F.superpixels(img, n_segments, replace_samples, self.max_size, cast(int, self.interpolation))

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, Any]:
    n_segments = random_utils.randint(self.n_segments[0], self.n_segments[1] + 1)
    p = random.uniform(*self.p_replace)
    return {"replace_samples": random_utils.random(n_segments) < p, "n_segments": n_segments}

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
    return ("p_replace", "n_segments", "max_size", "interpolation")

class TemplateTransform (templates, img_weight=(0.5, 0.5), template_weight=(0.5, 0.5), template_transform=None, name=None, always_apply=False, p=0.5) [view source on GitHub]

Apply blending of input image with specified templates

Parameters:

Name Type Description
templates numpy array or list of numpy arrays

Images as template for transform.

img_weight Union[float, Tuple[float, float]]

If single float weight will be sampled from (0, img_weight). If tuple of float img_weight will be in range [img_weight[0], img_weight[1]). If you want fixed weight, use (img_weight, img_weight) Default: (0.5, 0.5).

template_weight Union[float, Tuple[float, float]]

If single float weight will be sampled from (0, template_weight). If tuple of float template_weight will be in range [template_weight[0], template_weight[1]). If you want fixed weight, use (template_weight, template_weight) Default: (0.5, 0.5).

template_transform Optional[Callable[..., Any]]

transformation object which could be applied to template, must produce template the same size as input image.

name Optional[str]

(Optional) Name of transform, used only for deserialization.

p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class TemplateTransform(ImageOnlyTransform):
    """Apply blending of input image with specified templates
    Args:
        templates (numpy array or list of numpy arrays): Images as template for transform.
        img_weight: If single float weight will be sampled from (0, img_weight).
            If tuple of float img_weight will be in range `[img_weight[0], img_weight[1])`.
            If you want fixed weight, use (img_weight, img_weight)
            Default: (0.5, 0.5).
        template_weight: If single float weight will be sampled from (0, template_weight).
            If tuple of float template_weight will be in range `[template_weight[0], template_weight[1])`.
            If you want fixed weight, use (template_weight, template_weight)
            Default: (0.5, 0.5).
        template_transform: transformation object which could be applied to template,
            must produce template the same size as input image.
        name: (Optional) Name of transform, used only for deserialization.
        p: probability of applying the transform. Default: 0.5.
    Targets:
        image
    Image types:
        uint8, float32
    """

    class InitSchema(BaseTransformInitSchema):
        templates: Union[np.ndarray, Sequence[np.ndarray]] = Field(..., description="Images as template for transform.")
        img_weight: ZeroOneRangeType = (0.5, 0.5)
        template_weight: ZeroOneRangeType = (0.5, 0.5)
        template_transform: Optional[Callable[..., Any]] = Field(
            default=None,
            description="Transformation object applied to template.",
        )
        name: Optional[str] = Field(default=None, description="Name of transform, used only for deserialization.")

        @field_validator("templates")
        @classmethod
        def validate_templates(cls, v: Union[np.ndarray, List[np.ndarray]]) -> List[np.ndarray]:
            if isinstance(v, np.ndarray):
                return [v]
            if isinstance(v, list):
                if not all(isinstance(item, np.ndarray) for item in v):
                    msg = "All templates must be numpy arrays."
                    raise ValueError(msg)
                return v
            msg = "Templates must be a numpy array or a list of numpy arrays."
            raise TypeError(msg)

    def __init__(
        self,
        templates: Union[np.ndarray, List[np.ndarray]],
        img_weight: ScaleFloatType = (0.5, 0.5),
        template_weight: ScaleFloatType = (0.5, 0.5),
        template_transform: Optional[Callable[..., Any]] = None,
        name: Optional[str] = None,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.templates = templates
        self.img_weight = cast(Tuple[float, float], img_weight)
        self.template_weight = cast(Tuple[float, float], template_weight)
        self.template_transform = template_transform
        self.name = name

    def apply(
        self,
        img: np.ndarray,
        template: Optional[np.ndarray] = None,
        img_weight: float = 0.5,
        template_weight: float = 0.5,
        **params: Any,
    ) -> np.ndarray:
        return F.add_weighted(img, img_weight, template, template_weight)

    def get_params(self) -> Dict[str, float]:
        return {
            "img_weight": random.uniform(self.img_weight[0], self.img_weight[1]),
            "template_weight": random.uniform(self.template_weight[0], self.template_weight[1]),
        }

    def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
        img = params["image"]
        template = random.choice(self.templates)

        if self.template_transform is not None:
            template = self.template_transform(image=template)["image"]

        if get_num_channels(template) not in [1, get_num_channels(img)]:
            msg = (
                "Template must be a single channel or "
                "has the same number of channels as input "
                f"image ({get_num_channels(img)}), got {get_num_channels(template)}"
            )
            raise ValueError(msg)

        if template.dtype != img.dtype:
            msg = "Image and template must be the same image type"
            raise ValueError(msg)

        if img.shape[:2] != template.shape[:2]:
            raise ValueError(f"Image and template must be the same size, got {img.shape[:2]} and {template.shape[:2]}")

        if get_num_channels(template) == 1 and get_num_channels(img) > 1:
            template = np.stack((template,) * get_num_channels(img), axis=-1)

        # in order to support grayscale image with dummy dim
        template = template.reshape(img.shape)

        return {"template": template}

    @classmethod
    def is_serializable(cls) -> bool:
        return False

    @property
    def targets_as_params(self) -> List[str]:
        return ["image"]

    def to_dict_private(self) -> Dict[str, Any]:
        if self.name is None:
            msg = (
                "To make a TemplateTransform serializable you should provide the `name` argument, "
                "e.g. `TemplateTransform(name='my_transform', ...)`."
            )
            raise ValueError(msg)
        return {"__class_fullname__": self.get_class_fullname(), "__name__": self.name}

targets_as_params: List[str] property readonly

Targets used to get params

apply (self, img, template=None, img_weight=0.5, template_weight=0.5, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(
    self,
    img: np.ndarray,
    template: Optional[np.ndarray] = None,
    img_weight: float = 0.5,
    template_weight: float = 0.5,
    **params: Any,
) -> np.ndarray:
    return F.add_weighted(img, img_weight, template, template_weight)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, float]:
    return {
        "img_weight": random.uniform(self.img_weight[0], self.img_weight[1]),
        "template_weight": random.uniform(self.template_weight[0], self.template_weight[1]),
    }

get_params_dependent_on_targets (self, params)

Returns parameters dependent on targets. Dependent target is defined in self.targets_as_params

Source code in albumentations/augmentations/transforms.py
Python
def get_params_dependent_on_targets(self, params: Dict[str, Any]) -> Dict[str, Any]:
    img = params["image"]
    template = random.choice(self.templates)

    if self.template_transform is not None:
        template = self.template_transform(image=template)["image"]

    if get_num_channels(template) not in [1, get_num_channels(img)]:
        msg = (
            "Template must be a single channel or "
            "has the same number of channels as input "
            f"image ({get_num_channels(img)}), got {get_num_channels(template)}"
        )
        raise ValueError(msg)

    if template.dtype != img.dtype:
        msg = "Image and template must be the same image type"
        raise ValueError(msg)

    if img.shape[:2] != template.shape[:2]:
        raise ValueError(f"Image and template must be the same size, got {img.shape[:2]} and {template.shape[:2]}")

    if get_num_channels(template) == 1 and get_num_channels(img) > 1:
        template = np.stack((template,) * get_num_channels(img), axis=-1)

    # in order to support grayscale image with dummy dim
    template = template.reshape(img.shape)

    return {"template": template}

class ToFloat (max_value=None, always_apply=False, p=1.0) [view source on GitHub]

Divide pixel values by max_value to get a float32 output array where all values lie in the range [0, 1.0]. If max_value is None the transform will try to infer the maximum value by inspecting the data type of the input image.

See Also: :class:~albumentations.augmentations.transforms.FromFloat

Parameters:

Name Type Description
max_value Optional[float]

maximum possible input value. Default: None.

p float

probability of applying the transform. Default: 1.0.

Targets

image

Image types: any type

Source code in albumentations/augmentations/transforms.py
Python
class ToFloat(ImageOnlyTransform):
    """Divide pixel values by `max_value` to get a float32 output array where all values lie in the range [0, 1.0].
    If `max_value` is None the transform will try to infer the maximum value by inspecting the data type of the input
    image.

    See Also:
        :class:`~albumentations.augmentations.transforms.FromFloat`

    Args:
        max_value: maximum possible input value. Default: None.
        p: probability of applying the transform. Default: 1.0.

    Targets:
        image

    Image types:
        any type

    """

    class InitSchema(BaseTransformInitSchema):
        max_value: Optional[float] = Field(default=None, description="Maximum possible input value.")
        p: ProbabilityType = 1

    def __init__(self, max_value: Optional[float] = None, always_apply: bool = False, p: float = 1.0):
        super().__init__(always_apply, p)
        self.max_value = max_value

    def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
        return F.to_float(img, self.max_value)

    def get_transform_init_args_names(self) -> Tuple[str]:
        return ("max_value",)

apply (self, img, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
    return F.to_float(img, self.max_value)

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str]:
    return ("max_value",)

class ToGray [view source on GitHub]

Convert the input RGB image to grayscale. If the mean pixel value for the resulting image is greater than 127, invert the resulting grayscale image.

Parameters:

Name Type Description
p

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class ToGray(ImageOnlyTransform):
    """Convert the input RGB image to grayscale. If the mean pixel value for the resulting image is greater
    than 127, invert the resulting grayscale image.

    Args:
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    """

    def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
        if is_grayscale_image(img):
            warnings.warn("The image is already gray.")
            return img
        if not is_rgb_image(img):
            msg = "ToGray transformation expects 3-channel images."
            raise TypeError(msg)

        return F.to_gray(img)

    def get_transform_init_args_names(self) -> Tuple[()]:
        return ()

apply (self, img, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
    if is_grayscale_image(img):
        warnings.warn("The image is already gray.")
        return img
    if not is_rgb_image(img):
        msg = "ToGray transformation expects 3-channel images."
        raise TypeError(msg)

    return F.to_gray(img)

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[()]:
    return ()

class ToRGB (always_apply=True, p=1.0) [view source on GitHub]

Convert the input grayscale image to RGB.

Parameters:

Name Type Description
p float

probability of applying the transform. Default: 1.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class ToRGB(ImageOnlyTransform):
    """Convert the input grayscale image to RGB.

    Args:
        p: probability of applying the transform. Default: 1.

    Targets:
        image

    Image types:
        uint8, float32

    """

    def __init__(self, always_apply: bool = True, p: float = 1.0):
        super().__init__(always_apply=always_apply, p=p)

    def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
        if is_rgb_image(img):
            warnings.warn("The image is already an RGB.")
            return img
        if not is_grayscale_image(img):
            msg = "ToRGB transformation expects 2-dim images or 3-dim with the last dimension equal to 1."
            raise TypeError(msg)

        return F.gray_to_rgb(img)

    def get_transform_init_args_names(self) -> Tuple[()]:
        return ()

__init__ (self, always_apply=True, p=1.0) special

Initialize self. See help(type(self)) for accurate signature.

Source code in albumentations/augmentations/transforms.py
Python
def __init__(self, always_apply: bool = True, p: float = 1.0):
    super().__init__(always_apply=always_apply, p=p)

apply (self, img, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
    if is_rgb_image(img):
        warnings.warn("The image is already an RGB.")
        return img
    if not is_grayscale_image(img):
        msg = "ToRGB transformation expects 2-dim images or 3-dim with the last dimension equal to 1."
        raise TypeError(msg)

    return F.gray_to_rgb(img)

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[()]:
    return ()

class ToSepia (always_apply=False, p=0.5) [view source on GitHub]

Applies sepia filter to the input RGB image

Parameters:

Name Type Description
p float

probability of applying the transform. Default: 0.5.

Targets

image

Image types: uint8, float32

Source code in albumentations/augmentations/transforms.py
Python
class ToSepia(ImageOnlyTransform):
    """Applies sepia filter to the input RGB image

    Args:
        p: probability of applying the transform. Default: 0.5.

    Targets:
        image

    Image types:
        uint8, float32

    """

    def __init__(self, always_apply: bool = False, p: float = 0.5):
        super().__init__(always_apply, p)
        self.sepia_transformation_matrix = np.array(
            [[0.393, 0.769, 0.189], [0.349, 0.686, 0.168], [0.272, 0.534, 0.131]],
        )

    def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
        if not is_rgb_image(img):
            msg = "ToSepia transformation expects 3-channel images."
            raise TypeError(msg)
        return F.linear_transformation_rgb(img, self.sepia_transformation_matrix)

    def get_transform_init_args_names(self) -> Tuple[()]:
        return ()

__init__ (self, always_apply=False, p=0.5) special

Initialize self. See help(type(self)) for accurate signature.

Source code in albumentations/augmentations/transforms.py
Python
def __init__(self, always_apply: bool = False, p: float = 0.5):
    super().__init__(always_apply, p)
    self.sepia_transformation_matrix = np.array(
        [[0.393, 0.769, 0.189], [0.349, 0.686, 0.168], [0.272, 0.534, 0.131]],
    )

apply (self, img, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, **params: Any) -> np.ndarray:
    if not is_rgb_image(img):
        msg = "ToSepia transformation expects 3-channel images."
        raise TypeError(msg)
    return F.linear_transformation_rgb(img, self.sepia_transformation_matrix)

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[()]:
    return ()

class UnsharpMask (blur_limit=(3, 7), sigma_limit=0.0, alpha=(0.2, 0.5), threshold=10, always_apply=False, p=0.5) [view source on GitHub]

Sharpen the input image using Unsharp Masking processing and overlays the result with the original image.

Parameters:

Name Type Description
blur_limit Union[int, Tuple[int, int]]

maximum Gaussian kernel size for blurring the input image. Must be zero or odd and in range [0, inf). If set to 0 it will be computed from sigma as round(sigma * (3 if img.dtype == np.uint8 else 4) * 2 + 1) + 1. If set single value blur_limit will be in range (0, blur_limit). Default: (3, 7).

sigma_limit Union[float, Tuple[float, float]]

Gaussian kernel standard deviation. Must be in range [0, inf). If set single value sigma_limit will be in range (0, sigma_limit). If set to 0 sigma will be computed as sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8. Default: 0.

alpha Union[float, Tuple[float, float]]

range to choose the visibility of the sharpened image. At 0, only the original image is visible, at 1.0 only its sharpened version is visible. Default: (0.2, 0.5).

threshold int

Value to limit sharpening only for areas with high pixel difference between original image and it's smoothed version. Higher threshold means less sharpening on flat areas. Must be in range [0, 255]. Default: 10.

p float

probability of applying the transform. Default: 0.5.

Reference

arxiv.org/pdf/2107.10833.pdf

Targets

image

Source code in albumentations/augmentations/transforms.py
Python
class UnsharpMask(ImageOnlyTransform):
    """Sharpen the input image using Unsharp Masking processing and overlays the result with the original image.

    Args:
        blur_limit: maximum Gaussian kernel size for blurring the input image.
            Must be zero or odd and in range [0, inf). If set to 0 it will be computed from sigma
            as `round(sigma * (3 if img.dtype == np.uint8 else 4) * 2 + 1) + 1`.
            If set single value `blur_limit` will be in range (0, blur_limit).
            Default: (3, 7).
        sigma_limit: Gaussian kernel standard deviation. Must be in range [0, inf).
            If set single value `sigma_limit` will be in range (0, sigma_limit).
            If set to 0 sigma will be computed as `sigma = 0.3*((ksize-1)*0.5 - 1) + 0.8`. Default: 0.
        alpha: range to choose the visibility of the sharpened image.
            At 0, only the original image is visible, at 1.0 only its sharpened version is visible.
            Default: (0.2, 0.5).
        threshold: Value to limit sharpening only for areas with high pixel difference between original image
            and it's smoothed version. Higher threshold means less sharpening on flat areas.
            Must be in range [0, 255]. Default: 10.
        p: probability of applying the transform. Default: 0.5.

    Reference:
        arxiv.org/pdf/2107.10833.pdf

    Targets:
        image

    """

    class InitSchema(BaseTransformInitSchema):
        sigma_limit: NonNegativeFloatRangeType = 0
        alpha: ZeroOneRangeType = (0.2, 0.5)
        threshold: int = Field(default=10, ge=0, le=255, description="Threshold for limiting sharpening.")

        blur_limit: ScaleIntType = Field(
            default=(3, 7),
            description="Maximum kernel size for blurring the input image.",
        )

        @field_validator("blur_limit")
        @classmethod
        def process_blur(cls, value: ScaleIntType, info: ValidationInfo) -> Tuple[int, int]:
            return process_blur_limit(value, info, min_value=3)

    def __init__(
        self,
        blur_limit: ScaleIntType = (3, 7),
        sigma_limit: ScaleFloatType = 0.0,
        alpha: ScaleFloatType = (0.2, 0.5),
        threshold: int = 10,
        always_apply: bool = False,
        p: float = 0.5,
    ):
        super().__init__(always_apply=always_apply, p=p)
        self.blur_limit = cast(Tuple[int, int], blur_limit)
        self.sigma_limit = cast(Tuple[float, float], sigma_limit)
        self.alpha = cast(Tuple[float, float], alpha)
        self.threshold = threshold

    def get_params(self) -> Dict[str, Any]:
        return {
            "ksize": random.randrange(self.blur_limit[0], self.blur_limit[1] + 1, 2),
            "sigma": random.uniform(*self.sigma_limit),
            "alpha": random.uniform(*self.alpha),
        }

    def apply(self, img: np.ndarray, ksize: int = 3, sigma: int = 0, alpha: float = 0.2, **params: Any) -> np.ndarray:
        return F.unsharp_mask(img, ksize, sigma=sigma, alpha=alpha, threshold=self.threshold)

    def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
        return "blur_limit", "sigma_limit", "alpha", "threshold"

apply (self, img, ksize=3, sigma=0, alpha=0.2, **params)

Apply transform on image.

Source code in albumentations/augmentations/transforms.py
Python
def apply(self, img: np.ndarray, ksize: int = 3, sigma: int = 0, alpha: float = 0.2, **params: Any) -> np.ndarray:
    return F.unsharp_mask(img, ksize, sigma=sigma, alpha=alpha, threshold=self.threshold)

get_params (self)

Returns parameters independent of input

Source code in albumentations/augmentations/transforms.py
Python
def get_params(self) -> Dict[str, Any]:
    return {
        "ksize": random.randrange(self.blur_limit[0], self.blur_limit[1] + 1, 2),
        "sigma": random.uniform(*self.sigma_limit),
        "alpha": random.uniform(*self.alpha),
    }

get_transform_init_args_names (self)

Returns names of arguments that are used in init method of the transform

Source code in albumentations/augmentations/transforms.py
Python
def get_transform_init_args_names(self) -> Tuple[str, str, str, str]:
    return "blur_limit", "sigma_limit", "alpha", "threshold"