diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index d625d101..1a12e4dd 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -50,7 +50,7 @@ For image shape and channels, please use `utils.ImageShape` and `utils.ColorChan ### Image color format and channels -To avoid image shape mismatch issues, and to keep code simpler, we standardize the image color format to BGRA. This should always be done early in the pipeline, so whatever functionality takes care of obtaining an image should also ensure its color format. You can do so with `cv2.cvtColor` (ie: `cv2.cvtColor(image, cv2.COLOR_RGBA2BGRA)` or `cv2.cvtColor(image, cv2.COLOR_BGR2BGRA)`). +To avoid image shape mismatch issues, reduce memory usage, and to keep code simpler, we standardize the image color format to BGR. This should always be done early in the pipeline, so whatever functionality takes care of obtaining an image should also ensure its color format. You can do so with `cv2.cvtColor` (ie: `cv2.cvtColor(image, cv2.COLOR_RGB2BGR)` or `cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)`). Split Images' transparency is handled by first extracting a binary mask into `AutoSplitImage.mask`. ### Split-specific setting overrides diff --git a/src/AutoSplit.py b/src/AutoSplit.py index 0a6b6b24..b83949dc 100755 --- a/src/AutoSplit.py +++ b/src/AutoSplit.py @@ -1087,7 +1087,7 @@ def __update_split_image(self, specific_image: AutoSplitImage | None = None): text = "\nor\n".join(self.split_image.texts) self.current_split_image.setText(f"Looking for OCR text:\n{text}") elif is_valid_image(self.split_image.byte_array): - set_preview_image(self.current_split_image, self.split_image.byte_array) + set_preview_image(self.current_split_image, self.split_image.get_preview_image()) self.current_image_file_label.setText(self.split_image.filename) self.table_current_image_threshold_label.setText( diff --git a/src/AutoSplitImage.py b/src/AutoSplitImage.py index 6b04852f..aed16826 100644 --- a/src/AutoSplitImage.py +++ b/src/AutoSplitImage.py @@ -16,7 +16,7 @@ get_comparison_method_by_index, ) from utils import ( - BGR_CHANNEL_COUNT, + BGRA_CHANNEL_COUNT, MAXBYTE, TESSERACT_PATH, ColorChannel, @@ -187,16 +187,31 @@ def __read_image_bytes(self, path: str): interpolation=cv2.INTER_NEAREST, ) - # Mask based on adaptively resized, nearest neighbor interpolated split image + # Mask based on adaptively resized, nearest neighbor interpolated split image. + # This must happen before dropping the alpha channel below. self.mask = cv2.inRange(image, MASK_LOWER_BOUND, MASK_UPPER_BOUND) + image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR) else: image = cv2.resize(image, COMPARISON_RESIZE, interpolation=cv2.INTER_NEAREST) - # Add Alpha channel if missing - if image.shape[ImageShape.Channels] == BGR_CHANNEL_COUNT: - image = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA) + # Captures are standardized to BGR, so drop the alpha channel if present + if image.shape[ImageShape.Channels] == BGRA_CHANNEL_COUNT: + image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR) self.byte_array = image + def get_preview_image(self): + """ + The comparison `byte_array` is stored as BGR, but the preview should + show the user the transparency. Re-attach the mask as the alpha channel. + """ + if self.byte_array is None: + return None + if self.mask is None: + return self.byte_array + preview = cv2.cvtColor(self.byte_array, cv2.COLOR_BGR2BGRA) + preview[:, :, ColorChannel.Alpha] = self.mask + return preview + def check_flag(self, flag: int): return self.flags & flag == flag diff --git a/src/capture_method/BitBltCaptureMethod.py b/src/capture_method/BitBltCaptureMethod.py index 31e6b180..0588b812 100644 --- a/src/capture_method/BitBltCaptureMethod.py +++ b/src/capture_method/BitBltCaptureMethod.py @@ -5,6 +5,7 @@ import ctypes from typing import TYPE_CHECKING, override +import cv2 import numpy as np import pywintypes import win32con @@ -84,7 +85,8 @@ def get_frame(self) -> MatLike | None: try_delete_dc(compatible_dc) win32gui.ReleaseDC(hwnd, window_dc) win32gui.DeleteObject(bitmap.GetHandle()) - return image + # The OS hands us a native BGRA buffer; drop the unused alpha + return None if image is None else cv2.cvtColor(image, cv2.COLOR_BGRA2BGR) @override def recover_window(self, captured_window_title: str): diff --git a/src/capture_method/CaptureMethodBase.py b/src/capture_method/CaptureMethodBase.py index 3a937e7e..3331b589 100644 --- a/src/capture_method/CaptureMethodBase.py +++ b/src/capture_method/CaptureMethodBase.py @@ -33,7 +33,7 @@ def get_frame(self) -> MatLike | None: # noqa: PLR6301 Captures an image of the region for a window matching the given parameters of the bounding box. - @return: The image of the region in the window in BGRA format + @return: The image of the region in the window in BGR format """ return None diff --git a/src/capture_method/DesktopDuplicationCaptureMethod.py b/src/capture_method/DesktopDuplicationCaptureMethod.py index fc03206b..874b5a16 100644 --- a/src/capture_method/DesktopDuplicationCaptureMethod.py +++ b/src/capture_method/DesktopDuplicationCaptureMethod.py @@ -71,4 +71,4 @@ def get_frame(self): screenshot = self._desktop_duplication.screenshot((left, top, right, bottom)) if screenshot is None: return None - return cv2.cvtColor(screenshot, cv2.COLOR_RGB2BGRA) + return cv2.cvtColor(screenshot, cv2.COLOR_RGB2BGR) diff --git a/src/capture_method/ScrotCaptureMethod.py b/src/capture_method/ScrotCaptureMethod.py index 834a9686..9568dda0 100644 --- a/src/capture_method/ScrotCaptureMethod.py +++ b/src/capture_method/ScrotCaptureMethod.py @@ -34,7 +34,7 @@ def _scrot_screenshot(x: int, y: int, width: int, height: int): "-z", screenshot_file, )) - return imread(screenshot_file, cv2.IMREAD_COLOR_RGB) + return imread(screenshot_file, cv2.IMREAD_COLOR_BGR) except subprocess.CalledProcessError: # This can happen when trying to capture a region OOB # scrot is rude and prints directly to TTY, no stderr :/ @@ -75,7 +75,8 @@ def get_frame(self): ) if not is_valid_image(image): return None - return cv2.cvtColor(image, cv2.COLOR_RGB2BGRA) + # `imread` with `IMREAD_COLOR_BGR` already gives us BGR + return image @override def recover_window(self, captured_window_title: str): diff --git a/src/capture_method/VideoCaptureDeviceCaptureMethod.py b/src/capture_method/VideoCaptureDeviceCaptureMethod.py index d824802d..c53d6ce6 100644 --- a/src/capture_method/VideoCaptureDeviceCaptureMethod.py +++ b/src/capture_method/VideoCaptureDeviceCaptureMethod.py @@ -143,7 +143,8 @@ def get_frame(self): y : y + selection["height"], x : x + selection["width"], ] - self.last_converted_frame = cv2.cvtColor(image, cv2.COLOR_BGR2BGRA) + # `cv2.VideoCapture` frames are already BGR + self.last_converted_frame = image return self.last_converted_frame @override diff --git a/src/capture_method/WindowsGraphicsCaptureMethod.py b/src/capture_method/WindowsGraphicsCaptureMethod.py index 72e2a175..7962f4a8 100644 --- a/src/capture_method/WindowsGraphicsCaptureMethod.py +++ b/src/capture_method/WindowsGraphicsCaptureMethod.py @@ -7,6 +7,7 @@ import asyncio from typing import TYPE_CHECKING, cast, override +import cv2 import numpy as np import win32api import win32gui @@ -161,6 +162,8 @@ def get_frame(self) -> MatLike | None: selection["y"] : selection["y"] + selection["height"], selection["x"] : selection["x"] + selection["width"], ] + # The OS hands us a native BGRA buffer; drop the unused alpha + image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR) self.last_converted_frame = image return image diff --git a/src/capture_method/XcbCaptureMethod.py b/src/capture_method/XcbCaptureMethod.py index 901df671..a5cfbfb5 100644 --- a/src/capture_method/XcbCaptureMethod.py +++ b/src/capture_method/XcbCaptureMethod.py @@ -68,7 +68,7 @@ def get_frame(self): image = np.array(image) if not is_valid_image(image): return None - return cv2.cvtColor(image, cv2.COLOR_RGB2BGRA) + return cv2.cvtColor(image, cv2.COLOR_RGB2BGR) @override def recover_window(self, captured_window_title: str): diff --git a/src/compare.py b/src/compare.py index 574c9c8f..c6eead1a 100644 --- a/src/compare.py +++ b/src/compare.py @@ -7,6 +7,7 @@ import numpy as np from utils import ( + BGR_CHANNEL_COUNT, BGRA_CHANNEL_COUNT, MAXBYTE, ColorChannel, @@ -22,7 +23,7 @@ CHANNELS = (ColorChannel.Red.value, ColorChannel.Green.value, ColorChannel.Blue.value) HISTOGRAM_SIZE = (8, 8, 8) RANGES = (0, MAXRANGE, 0, MAXRANGE, 0, MAXRANGE) -MASK_SIZE_MULTIPLIER = ColorChannel.Alpha * MAXBYTE * MAXBYTE +MASK_SIZE_MULTIPLIER = BGR_CHANNEL_COUNT * MAXBYTE * MAXBYTE MAX_VALUE = 1.0 CV2_PHASH_SIZE = 8 diff --git a/src/region_selection.py b/src/region_selection.py index 121ffafc..5452dc40 100644 --- a/src/region_selection.py +++ b/src/region_selection.py @@ -13,7 +13,7 @@ import error_messages from capture_method import Region from utils import ( - BGR_CHANNEL_COUNT, + BGRA_CHANNEL_COUNT, MAXBYTE, ImageShape, auto_split_directory, @@ -219,9 +219,9 @@ def align_region(autosplit: AutoSplit): return template = imread(template_filename, cv2.IMREAD_UNCHANGED) - # Add alpha channel to template if it's missing. - if template.shape[ImageShape.Channels] == BGR_CHANNEL_COUNT: - template = cv2.cvtColor(template, cv2.COLOR_BGR2BGRA) + # Captures are standardized to BGR, so drop the template's alpha channel if present + if template.shape[ImageShape.Channels] == BGRA_CHANNEL_COUNT: + template = cv2.cvtColor(template, cv2.COLOR_BGRA2BGR) # Validate template is a valid image file if not is_valid_image(template):