diff --git a/scenedetect.cfg b/scenedetect.cfg index e2c540ad..782f32da 100644 --- a/scenedetect.cfg +++ b/scenedetect.cfg @@ -143,10 +143,9 @@ # Threshold value (float) that the calculated difference between subsequent # histograms must exceed to trigger a new scene. -#threshold = 20000.0 +#threshold = 0.95 +#bins = 256 -# Number of bits to use for image quantization before binning. -#bits = 4 # Minimum length of a given scene (overrides [global] option). #min-scene-len = 0.6s diff --git a/scenedetect/__init__.py b/scenedetect/__init__.py index f928ad4e..ad26d9c6 100644 --- a/scenedetect/__init__.py +++ b/scenedetect/__init__.py @@ -36,7 +36,7 @@ from scenedetect.video_stream import VideoStream, VideoOpenFailure from scenedetect.video_splitter import split_video_ffmpeg, split_video_mkvmerge from scenedetect.scene_detector import SceneDetector -from scenedetect.detectors import ContentDetector, AdaptiveDetector, ThresholdDetector, HashDetector +from scenedetect.detectors import ContentDetector, AdaptiveDetector, ThresholdDetector, HistogramDetector, HashDetector from scenedetect.backends import (AVAILABLE_BACKENDS, VideoStreamCv2, VideoStreamAv, VideoStreamMoviePy, VideoCaptureAdapter) from scenedetect.stats_manager import StatsManager, StatsFileCorrupt diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py index 5abcfe15..afac2161 100644 --- a/scenedetect/_cli/__init__.py +++ b/scenedetect/_cli/__init__.py @@ -730,17 +730,17 @@ def detect_threshold_command( type=click.FloatRange(CONFIG_MAP['detect-hist']['threshold'].min_val, CONFIG_MAP['detect-hist']['threshold'].max_val), default=None, - help='Threshold value (float) that the rgb histogram difference must exceed to trigger' + help='Threshold value (float) that the YCbCr histogram difference must exceed to trigger' ' a new scene. Refer to frame metric hist_diff in stats file.%s' % (USER_CONFIG.get_help_string('detect-hist', 'threshold'))) @click.option( - '--bits', + '--bins', '-b', metavar='NUM', type=click.INT, - default=None, - help='The number of most significant figures to keep when quantizing the RGB color channels.%s' - % (USER_CONFIG.get_help_string("detect-hist", "bits"))) + default=256, + help='The number of bins to use for the histogram calculation.%s' + % (USER_CONFIG.get_help_string("detect-hist", "bins"))) @click.option( '--min-scene-len', '-m', @@ -753,7 +753,7 @@ def detect_threshold_command( ('' if USER_CONFIG.is_default('detect-hist', 'min-scene-len') else USER_CONFIG.get_help_string( 'detect-hist', 'min-scene-len'))) @click.pass_context -def detect_hist_command(ctx: click.Context, threshold: Optional[float], bits: Optional[int], +def detect_hist_command(ctx: click.Context, threshold: Optional[float], bins: Optional[int], min_scene_len: Optional[str]): """Perform detection of scenes by comparing differences in the RGB histograms of adjacent frames. @@ -762,13 +762,13 @@ def detect_hist_command(ctx: click.Context, threshold: Optional[float], bits: Op detect-hist - detect-hist --threshold 20000.0 + detect-hist --threshold 0.8 --bins 128 """ assert isinstance(ctx.obj, CliContext) assert isinstance(ctx.obj, CliContext) detector_args = ctx.obj.get_detect_hist_params( - threshold=threshold, bits=bits, min_scene_len=min_scene_len) + threshold=threshold, bins=bins, min_scene_len=min_scene_len) logger.debug('Adding detector: HistogramDetector(%s)', detector_args) ctx.obj.add_detector(HistogramDetector(**detector_args)) diff --git a/scenedetect/_cli/config.py b/scenedetect/_cli/config.py index 95fb0b9f..d8e38867 100644 --- a/scenedetect/_cli/config.py +++ b/scenedetect/_cli/config.py @@ -278,9 +278,9 @@ def format(self, timecode: FrameTimecode) -> str: 'threshold': RangeValue(12.0, min_val=0.0, max_val=255.0), }, 'detect-hist': { - 'bits': 4, 'min-scene-len': TimecodeValue(0), - 'threshold': RangeValue(20000.0, min_val=0.0, max_val=10000000000.0), + 'threshold': RangeValue(0.95, min_val=0.0, max_val=1.0), + 'bins': RangeValue(256, min_val=1, max_val=256), }, 'load-scenes': { 'start-col-name': 'Start Frame', diff --git a/scenedetect/_cli/context.py b/scenedetect/_cli/context.py index a5f7103e..56da6478 100644 --- a/scenedetect/_cli/context.py +++ b/scenedetect/_cli/context.py @@ -461,7 +461,7 @@ def handle_load_scenes(self, input: AnyStr, start_col_name: Optional[str]): self.load_scenes_column_name = self.config.get_value("load-scenes", "start-col-name", start_col_name) - def get_detect_hist_params(self, threshold: Optional[float], bits: Optional[int], + def get_detect_hist_params(self, threshold: Optional[float], bins: Optional[int], min_scene_len: Optional[str]) -> Dict[str, Any]: """Handle detect-hist command options and return dict to construct one with.""" self._ensure_input_open() @@ -475,7 +475,7 @@ def get_detect_hist_params(self, threshold: Optional[float], bits: Optional[int] min_scene_len = self.config.get_value("detect-hist", "min-scene-len") min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num return { - 'bits': self.config.get_value("detect-hist", "bits", bits), + 'bins': self.config.get_value("detect-hist", "bins", bins), 'min_scene_len': min_scene_len, 'threshold': self.config.get_value("detect-hist", "threshold", threshold), } diff --git a/scenedetect/detectors/histogram_detector.py b/scenedetect/detectors/histogram_detector.py index 937b7e13..e6ba7f9b 100644 --- a/scenedetect/detectors/histogram_detector.py +++ b/scenedetect/detectors/histogram_detector.py @@ -18,6 +18,7 @@ from typing import List +import cv2 import numpy # PySceneDetect Library Imports @@ -30,29 +31,28 @@ class HistogramDetector(SceneDetector): METRIC_KEYS = ['hist_diff'] - def __init__(self, threshold: float = 20000.0, bits: int = 4, min_scene_len: int = 15): + def __init__(self, threshold: float = 0.95, bins: int = 256, min_scene_len: int = 15): """ Arguments: threshold: Threshold value (float) that the calculated difference between subsequent - histograms must exceed to trigger a new scene. - bits: Number of most significant bits to keep of the pixel values. Most videos and - images are 8-bit rgb (0-255) and the default is to just keep the 4 most siginificant - bits. This compresses the 3*8bit (24bit) image down to 3*4bits (12bits). This makes - quantizing the rgb histogram a bit easier and comparisons more meaningful. + histograms must exceed to trigger a new scene. + The threshold value should be between 0 and 1 (perfect positive correlation, identical histograms). + Values close to 1 indicate very similar frames, while lower values suggest changes. + bins: Number of bins to use for the histogram. min_scene_len: Minimum length of any scene. """ super().__init__() self._threshold = threshold - self._bits = bits + self._bins = bins self._min_scene_len = min_scene_len - self._hist_bins = range(2**(3 * self._bits)) self._last_hist = None self._last_scene_cut = None def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]: - """First, compress the image according to the self.bits value, then build a histogram for - the input frame. Afterward, compare against the previously analyzed frame and check if the - difference is large enough to trigger a cut. + """Computes the histogram of the luma channel of the frame image and compares it with the + histogram of the luma channel of the previous frame. If the difference between the histograms + exceeds the threshold, a scene cut is detected. + Histogram difference is computed using the correlation metric. Arguments: frame_num: Frame number of frame that is being passed. @@ -77,25 +77,24 @@ def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]: if not self._last_scene_cut: self._last_scene_cut = frame_num - # Quantize the image and separate the color channels - quantized_imgs = self._quantize_frame(frame_img=frame_img, bits=self._bits) - - # Perform bit shifting operations and bitwise combine color channels into one array - composite_img = self._shift_bits(quantized_imgs=quantized_imgs, bits=self._bits) - - # Create the histogram with a bin for every rgb value - hist, _ = numpy.histogram(composite_img, bins=self._hist_bins) + hist = self.calculate_histogram(frame_img, bins = self._bins) # We can only start detecting once we have a frame to compare with. if self._last_hist is not None: + #TODO: We can have EMA of histograms to make it more robust + # ema_hist = alpha * hist + (1 - alpha) * ema_hist + # Compute histogram difference between frames - hist_diff = numpy.sum(numpy.fabs(self._last_hist - hist)) + hist_diff = cv2.compareHist(self._last_hist, hist, cv2.HISTCMP_CORREL) # Check if a new scene should be triggered - - # TODO(#53): We should probably normalize the threshold based on the frame size, as - # larger images will have more pixels in each bin. - if hist_diff >= self._threshold and ((frame_num - self._last_scene_cut) + # Set a correlation threshold to determine scene changes. + # The threshold value should be between -1 (perfect negative correlation, not applicable here) + # and 1 (perfect positive correlation, identical histograms). + # Values close to 1 indicate very similar frames, while lower values suggest changes. + # Example: If `_threshold` is set to 0.8, it implies that only changes resulting in a correlation + # less than 0.8 between histograms will be considered significant enough to denote a scene change. + if hist_diff <= self._threshold and ((frame_num - self._last_scene_cut) >= self._min_scene_len): cut_list.append(frame_num) self._last_scene_cut = frame_num @@ -108,82 +107,52 @@ def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]: return cut_list - def _quantize_frame(self, frame_img, bits): - """Quantizes the image based on the number of most significant figures to be preserved. - - Arguments: - frame_img: The 8-bit rgb image of the frame being analyzed. - bits: The number of most significant bits to keep during quantization. - - Returns: - [red_img, green_img, blue_img]: - The three separated color channels of the frame image that have been quantized. + def calculate_histogram(self, + frame_img: numpy.ndarray, + bins: int = 256, + normalize: bool = True) -> numpy.ndarray: """ - # First, find the value of the number of most significant bits, padding with zeroes - bit_value = int(bin(2**bits - 1).ljust(10, '0'), 2) - - # Separate R, G, and B color channels and cast to int for easier bitwise operations - red_img = frame_img[:, :, 0].astype(int) - green_img = frame_img[:, :, 1].astype(int) - blue_img = frame_img[:, :, 2].astype(int) - - # Quantize the frame images - red_img = red_img & bit_value - green_img = green_img & bit_value - blue_img = blue_img & bit_value - - return [red_img, green_img, blue_img] - - def _shift_bits(self, quantized_imgs, bits): - """Takes care of the bit shifting operations to combine the RGB color - channels into a single array. - - Arguments: - quantized_imgs: A list of the three quantized images of the RGB color channels - respectively. - bits: The number of most significant bits to use for quantizing the image. + Calculates and optionally normalizes the histogram of the luma (Y) channel of an image converted from BGR to YUV color space. + + This function extracts the Y channel from the given BGR image, computes its histogram with the specified number of bins, + and optionally normalizes this histogram to have a sum of one across all bins. + + Args: + ----- + frame_img : np.ndarray + The input image in BGR color space, assumed to have shape (height, width, 3) + where the last dimension represents the BGR channels. + bins : int, optional (default=256) + The number of bins to use for the histogram. + normalize : bool, optional (default=True) + A boolean flag that determines whether the histogram should be normalized + such that the sum of all histogram bins equals 1. Returns: - composite_img: The resulting array after all bitwise operations. + -------- + np.ndarray + A 1D numpy array of length equal to `bins`, representing the histogram of the luma channel. + Each element in the array represents the count (or frequency) of a particular luma value in the image. + If normalized, these values represent the relative frequency. + + Examples: + --------- + >>> img = cv2.imread('path_to_image.jpg') + >>> hist = calculate_histogram(img, bins=256, normalize=True) + >>> print(hist.shape) + (256,) """ - # First, figure out how much each shift needs to be - blue_shift = 8 - bits - green_shift = 8 - 2 * bits - red_shift = 8 - 3 * bits - - # Separate our color channels for ease - red_img = quantized_imgs[0] - green_img = quantized_imgs[1] - blue_img = quantized_imgs[2] - - # Perform the bit shifting for each color - red_img = self._shift_images(img=red_img, img_shift=red_shift) - green_img = self._shift_images(img=green_img, img_shift=green_shift) - blue_img = self._shift_images(img=blue_img, img_shift=blue_shift) - - # Join our rgb arrays together - composite_img = numpy.bitwise_or(red_img, numpy.bitwise_or(green_img, blue_img)) + # Extract Luma channel from the frame image + y, _, _ = cv2.split(cv2.cvtColor(frame_img, cv2.COLOR_BGR2YUV)) - return composite_img - - def _shift_images(self, img, img_shift): - """Do bitwise shifting operations for a color channel image checking for shift direction. - - Arguments: - img: A quantized image of a single color channel - img_shift: How many bits to shift the values of img. If the value is negative, the shift - direction is to the left and 8 is added to make it a positive value. + # Create the histogram with a bin for every rgb value + hist = cv2.calcHist([y], [0], None, [bins], [0, 256]) - Returns: - shifted_img: The bitwise shifted image. - """ - if img_shift < 0: - img_shift += 8 - shifted_img = numpy.left_shift(img, img_shift) - else: - shifted_img = numpy.right_shift(img, img_shift) + if normalize: + # Normalize the histogram + hist = cv2.normalize(hist, hist).flatten() - return shifted_img + return hist def is_processing_required(self, frame_num: int) -> bool: return True diff --git a/website/pages/api.md b/website/pages/api.md index b3cb79a9..3b09d398 100644 --- a/website/pages/api.md +++ b/website/pages/api.md @@ -27,7 +27,7 @@ The threshold-based scene detector (`detect-threshold`) is how most traditional ## Histogram Detector -The color histogram detector uses color information to detect fast cuts. The input video for this detector must be in 8-bit color. The detection algorithm consists of separating the three RGB color channels and then quantizing them by eliminating all but the given number of most significant bits (`--bits/-b`). The resulting quantized color channels are then bit shifted and joined together into a new, composite image. A histogram is then constructed from the pixel values in the new, composite image. This histogram is compared element-wise with the histogram from the previous frame and if the total difference between the two adjacent histograms exceeds the given threshold (`--threshold/-t`), then a new scene is triggered. +The scene change detection algorithm uses histograms of the Y channel in the YCbCr color space to detect scene changes, which helps mitigate issues caused by lighting variations. Each frame of the video is converted from its original color space to the YCbCr color space.The Y channel, which represents luminance, is extracted from the YCbCr color space. This helps in focusing on intensity variations rather than color variations. A histogram of the Y channel is computed using the specified number of bins (--bins/-b). The histogram is normalized to ensure that it can be consistently compared with histograms from other frames. The normalized histogram of the current frame is compared with the normalized histogram of the previous frame using the correlation method (cv2.HISTCMP_CORREL). A scene change is detected if the correlation between the histograms of consecutive frames is below the specified threshold (--threshold/-t). This indicates a significant change in luminance, suggesting a scene change. ## Perceptual Hash Detector