Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ For image shape and channels, please use `utils.ImageShape` and `utils.ColorChan

### Image color format and channels

To avoid image shape mismatch issues, and to keep code simpler, we standardize the image color format to BGRA. This should always be done early in the pipeline, so whatever functionality takes care of obtaining an image should also ensure its color format. You can do so with `cv2.cvtColor` (ie: `cv2.cvtColor(image, cv2.COLOR_RGBA2BGRA)` or `cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)`).
To avoid image shape mismatch issues, reduce memory usage, and to keep code simpler, we standardize the image color format to BGR. This should always be done early in the pipeline, so whatever functionality takes care of obtaining an image should also ensure its color format. You can do so with `cv2.cvtColor` (ie: `cv2.cvtColor(image, cv2.COLOR_RGB2BGR)` or `cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)`). Split Images' transparency is handled by first extracting a binary mask into `AutoSplitImage.mask`.

### Split-specific setting overrides

Expand Down
2 changes: 1 addition & 1 deletion src/AutoSplit.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,7 +1087,7 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None):
text = "\nor\n".join(self.split_image.texts)
self.current_split_image.setText(f"Looking for OCR text:\n{text}")
elif is_valid_image(self.split_image.byte_array):
set_preview_image(self.current_split_image, self.split_image.byte_array)
set_preview_image(self.current_split_image, self.split_image.get_preview_image())

self.current_image_file_label.setText(self.split_image.filename)
self.table_current_image_threshold_label.setText(
Expand Down
25 changes: 20 additions & 5 deletions src/AutoSplitImage.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
get_comparison_method_by_index,
)
from utils import (
BGR_CHANNEL_COUNT,
BGRA_CHANNEL_COUNT,
MAXBYTE,
TESSERACT_PATH,
ColorChannel,
Expand Down Expand Up @@ -187,16 +187,31 @@ def __read_image_bytes(self, path: str):
interpolation=cv2.INTER_NEAREST,
)

# Mask based on adaptively resized, nearest neighbor interpolated split image
# Mask based on adaptively resized, nearest neighbor interpolated split image.
# This must happen before dropping the alpha channel below.
self.mask = cv2.inRange(image, MASK_LOWER_BOUND, MASK_UPPER_BOUND)
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
else:
image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST)
# Add Alpha channel if missing
if image.shape[ImageShape.Channels] == BGR_CHANNEL_COUNT:
image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
# Captures are standardized to BGR, so drop the alpha channel if present
if image.shape[ImageShape.Channels] == BGRA_CHANNEL_COUNT:
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)

self.byte_array = image

def get_preview_image(self):
"""
The comparison `byte_array` is stored as BGR, but the preview should
show the user the transparency. Re-attach the mask as the alpha channel.
"""
if self.byte_array is None:
return None
if self.mask is None:
return self.byte_array
preview = cv2.cvtColor(self.byte_array, cv2.COLOR_BGR2BGRA)
preview[:, :, ColorChannel.Alpha] = self.mask
return preview

def check_flag(self, flag: int):
return self.flags & flag == flag

Expand Down
4 changes: 3 additions & 1 deletion src/capture_method/BitBltCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import ctypes
from typing import TYPE_CHECKING, override

import cv2
import numpy as np
import pywintypes
import win32con
Expand Down Expand Up @@ -84,7 +85,8 @@ def get_frame(self) -> MatLike | None:
try_delete_dc(compatible_dc)
win32gui.ReleaseDC(hwnd, window_dc)
win32gui.DeleteObject(bitmap.GetHandle())
return image
# The OS hands us a native BGRA buffer; drop the unused alpha
return None if image is None else cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)

@override
def recover_window(self, captured_window_title: str):
Expand Down
2 changes: 1 addition & 1 deletion src/capture_method/CaptureMethodBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def get_frame(self) -> MatLike | None: # noqa: PLR6301
Captures an image of the region for a window matching the given
parameters of the bounding box.

@return: The image of the region in the window in BGRA format
@return: The image of the region in the window in BGR format
"""
return None

Expand Down
2 changes: 1 addition & 1 deletion src/capture_method/DesktopDuplicationCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ def get_frame(self):
screenshot = self._desktop_duplication.screenshot((left, top, right, bottom))
if screenshot is None:
return None
return cv2.cvtColor(screenshot, cv2.COLOR_RGB2BGRA)
return cv2.cvtColor(screenshot, cv2.COLOR_RGB2BGR)
5 changes: 3 additions & 2 deletions src/capture_method/ScrotCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _scrot_screenshot(x: int, y: int, width: int, height: int):
"-z",
screenshot_file,
))
return imread(screenshot_file, cv2.IMREAD_COLOR_RGB)
return imread(screenshot_file, cv2.IMREAD_COLOR_BGR)
except subprocess.CalledProcessError:
# This can happen when trying to capture a region OOB
# scrot is rude and prints directly to TTY, no stderr :/
Expand Down Expand Up @@ -75,7 +75,8 @@ def get_frame(self):
)
if not is_valid_image(image):
return None
return cv2.cvtColor(image, cv2.COLOR_RGB2BGRA)
# `imread` with `IMREAD_COLOR_BGR` already gives us BGR
return image

@override
def recover_window(self, captured_window_title: str):
Expand Down
3 changes: 2 additions & 1 deletion src/capture_method/VideoCaptureDeviceCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def get_frame(self):
y : y + selection["height"],
x : x + selection["width"],
]
self.last_converted_frame = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)
# `cv2.VideoCapture` frames are already BGR
self.last_converted_frame = image
return self.last_converted_frame

@override
Expand Down
3 changes: 3 additions & 0 deletions src/capture_method/WindowsGraphicsCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import asyncio
from typing import TYPE_CHECKING, cast, override

import cv2
import numpy as np
import win32api
import win32gui
Expand Down Expand Up @@ -161,6 +162,8 @@ def get_frame(self) -> MatLike | None:
selection["y"] : selection["y"] + selection["height"],
selection["x"] : selection["x"] + selection["width"],
]
# The OS hands us a native BGRA buffer; drop the unused alpha
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
self.last_converted_frame = image
return image

Expand Down
2 changes: 1 addition & 1 deletion src/capture_method/XcbCaptureMethod.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_frame(self):
image = np.array(image)
if not is_valid_image(image):
return None
return cv2.cvtColor(image, cv2.COLOR_RGB2BGRA)
return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

@override
def recover_window(self, captured_window_title: str):
Expand Down
3 changes: 2 additions & 1 deletion src/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np

from utils import (
BGR_CHANNEL_COUNT,
BGRA_CHANNEL_COUNT,
MAXBYTE,
ColorChannel,
Expand All @@ -22,7 +23,7 @@
CHANNELS = (ColorChannel.Red.value, ColorChannel.Green.value, ColorChannel.Blue.value)
HISTOGRAM_SIZE = (8, 8, 8)
RANGES = (0, MAXRANGE, 0, MAXRANGE, 0, MAXRANGE)
MASK_SIZE_MULTIPLIER = ColorChannel.Alpha * MAXBYTE * MAXBYTE
MASK_SIZE_MULTIPLIER = BGR_CHANNEL_COUNT * MAXBYTE * MAXBYTE
MAX_VALUE = 1.0
CV2_PHASH_SIZE = 8

Expand Down
8 changes: 4 additions & 4 deletions src/region_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import error_messages
from capture_method import Region
from utils import (
BGR_CHANNEL_COUNT,
BGRA_CHANNEL_COUNT,
MAXBYTE,
ImageShape,
auto_split_directory,
Expand Down Expand Up @@ -219,9 +219,9 @@ def align_region(autosplit: AutoSplit):
return

template = imread(template_filename, cv2.IMREAD_UNCHANGED)
# Add alpha channel to template if it's missing.
if template.shape[ImageShape.Channels] == BGR_CHANNEL_COUNT:
template = cv2.cvtColor(template, cv2.COLOR_BGR2BGRA)
# Captures are standardized to BGR, so drop the template's alpha channel if present
if template.shape[ImageShape.Channels] == BGRA_CHANNEL_COUNT:
template = cv2.cvtColor(template, cv2.COLOR_BGRA2BGR)

# Validate template is a valid image file
if not is_valid_image(template):
Expand Down
Loading