Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions scenedetect.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,9 @@

# Threshold value (float) that the calculated difference between subsequent
# histograms must exceed to trigger a new scene.
#threshold = 20000.0
#threshold = 0.95
#bins = 256

# Number of bits to use for image quantization before binning.
#bits = 4

# Minimum length of a given scene (overrides [global] option).
#min-scene-len = 0.6s
Expand Down
2 changes: 1 addition & 1 deletion scenedetect/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from scenedetect.video_stream import VideoStream, VideoOpenFailure
from scenedetect.video_splitter import split_video_ffmpeg, split_video_mkvmerge
from scenedetect.scene_detector import SceneDetector
from scenedetect.detectors import ContentDetector, AdaptiveDetector, ThresholdDetector, HashDetector
from scenedetect.detectors import ContentDetector, AdaptiveDetector, ThresholdDetector, HistogramDetector, HashDetector
from scenedetect.backends import (AVAILABLE_BACKENDS, VideoStreamCv2, VideoStreamAv,
VideoStreamMoviePy, VideoCaptureAdapter)
from scenedetect.stats_manager import StatsManager, StatsFileCorrupt
Expand Down
16 changes: 8 additions & 8 deletions scenedetect/_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,17 +730,17 @@ def detect_threshold_command(
type=click.FloatRange(CONFIG_MAP['detect-hist']['threshold'].min_val,
CONFIG_MAP['detect-hist']['threshold'].max_val),
default=None,
help='Threshold value (float) that the rgb histogram difference must exceed to trigger'
help='Threshold value (float) that the YCbCr histogram difference must exceed to trigger'
' a new scene. Refer to frame metric hist_diff in stats file.%s' %
(USER_CONFIG.get_help_string('detect-hist', 'threshold')))
@click.option(
'--bits',
'--bins',
'-b',
metavar='NUM',
type=click.INT,
default=None,
help='The number of most significant figures to keep when quantizing the RGB color channels.%s'
% (USER_CONFIG.get_help_string("detect-hist", "bits")))
default=256,
help='The number of bins to use for the histogram calculation.%s'
% (USER_CONFIG.get_help_string("detect-hist", "bins")))
@click.option(
'--min-scene-len',
'-m',
Expand All @@ -753,7 +753,7 @@ def detect_threshold_command(
('' if USER_CONFIG.is_default('detect-hist', 'min-scene-len') else USER_CONFIG.get_help_string(
'detect-hist', 'min-scene-len')))
@click.pass_context
def detect_hist_command(ctx: click.Context, threshold: Optional[float], bits: Optional[int],
def detect_hist_command(ctx: click.Context, threshold: Optional[float], bins: Optional[int],
min_scene_len: Optional[str]):
"""Perform detection of scenes by comparing differences in the RGB histograms of adjacent
frames.
Expand All @@ -762,13 +762,13 @@ def detect_hist_command(ctx: click.Context, threshold: Optional[float], bits: Op

detect-hist

detect-hist --threshold 20000.0
detect-hist --threshold 0.8 --bins 128
"""
assert isinstance(ctx.obj, CliContext)

assert isinstance(ctx.obj, CliContext)
detector_args = ctx.obj.get_detect_hist_params(
threshold=threshold, bits=bits, min_scene_len=min_scene_len)
threshold=threshold, bins=bins, min_scene_len=min_scene_len)
logger.debug('Adding detector: HistogramDetector(%s)', detector_args)
ctx.obj.add_detector(HistogramDetector(**detector_args))

Expand Down
4 changes: 2 additions & 2 deletions scenedetect/_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,9 @@ def format(self, timecode: FrameTimecode) -> str:
'threshold': RangeValue(12.0, min_val=0.0, max_val=255.0),
},
'detect-hist': {
'bits': 4,
'min-scene-len': TimecodeValue(0),
'threshold': RangeValue(20000.0, min_val=0.0, max_val=10000000000.0),
'threshold': RangeValue(0.95, min_val=0.0, max_val=1.0),
'bins': RangeValue(256, min_val=1, max_val=256),
},
'load-scenes': {
'start-col-name': 'Start Frame',
Expand Down
4 changes: 2 additions & 2 deletions scenedetect/_cli/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def handle_load_scenes(self, input: AnyStr, start_col_name: Optional[str]):
self.load_scenes_column_name = self.config.get_value("load-scenes", "start-col-name",
start_col_name)

def get_detect_hist_params(self, threshold: Optional[float], bits: Optional[int],
def get_detect_hist_params(self, threshold: Optional[float], bins: Optional[int],
min_scene_len: Optional[str]) -> Dict[str, Any]:
"""Handle detect-hist command options and return dict to construct one with."""
self._ensure_input_open()
Expand All @@ -475,7 +475,7 @@ def get_detect_hist_params(self, threshold: Optional[float], bits: Optional[int]
min_scene_len = self.config.get_value("detect-hist", "min-scene-len")
min_scene_len = parse_timecode(min_scene_len, self.video_stream.frame_rate).frame_num
return {
'bits': self.config.get_value("detect-hist", "bits", bits),
'bins': self.config.get_value("detect-hist", "bins", bins),
'min_scene_len': min_scene_len,
'threshold': self.config.get_value("detect-hist", "threshold", threshold),
}
Expand Down
155 changes: 62 additions & 93 deletions scenedetect/detectors/histogram_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from typing import List

import cv2
import numpy

# PySceneDetect Library Imports
Expand All @@ -30,29 +31,28 @@ class HistogramDetector(SceneDetector):

METRIC_KEYS = ['hist_diff']

def __init__(self, threshold: float = 20000.0, bits: int = 4, min_scene_len: int = 15):
def __init__(self, threshold: float = 0.95, bins: int = 256, min_scene_len: int = 15):
"""
Arguments:
threshold: Threshold value (float) that the calculated difference between subsequent
histograms must exceed to trigger a new scene.
bits: Number of most significant bits to keep of the pixel values. Most videos and
images are 8-bit rgb (0-255) and the default is to just keep the 4 most siginificant
bits. This compresses the 3*8bit (24bit) image down to 3*4bits (12bits). This makes
quantizing the rgb histogram a bit easier and comparisons more meaningful.
histograms must exceed to trigger a new scene.
The threshold value should be between 0 and 1 (perfect positive correlation, identical histograms).
Values close to 1 indicate very similar frames, while lower values suggest changes.
bins: Number of bins to use for the histogram.
min_scene_len: Minimum length of any scene.
"""
super().__init__()
self._threshold = threshold
self._bits = bits
self._bins = bins
self._min_scene_len = min_scene_len
self._hist_bins = range(2**(3 * self._bits))
self._last_hist = None
self._last_scene_cut = None

def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]:
"""First, compress the image according to the self.bits value, then build a histogram for
the input frame. Afterward, compare against the previously analyzed frame and check if the
difference is large enough to trigger a cut.
"""Computes the histogram of the luma channel of the frame image and compares it with the
histogram of the luma channel of the previous frame. If the difference between the histograms
exceeds the threshold, a scene cut is detected.
Histogram difference is computed using the correlation metric.

Arguments:
frame_num: Frame number of frame that is being passed.
Expand All @@ -77,25 +77,24 @@ def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]:
if not self._last_scene_cut:
self._last_scene_cut = frame_num

# Quantize the image and separate the color channels
quantized_imgs = self._quantize_frame(frame_img=frame_img, bits=self._bits)

# Perform bit shifting operations and bitwise combine color channels into one array
composite_img = self._shift_bits(quantized_imgs=quantized_imgs, bits=self._bits)

# Create the histogram with a bin for every rgb value
hist, _ = numpy.histogram(composite_img, bins=self._hist_bins)
hist = self.calculate_histogram(frame_img, bins = self._bins)

# We can only start detecting once we have a frame to compare with.
if self._last_hist is not None:
#TODO: We can have EMA of histograms to make it more robust
# ema_hist = alpha * hist + (1 - alpha) * ema_hist

# Compute histogram difference between frames
hist_diff = numpy.sum(numpy.fabs(self._last_hist - hist))
hist_diff = cv2.compareHist(self._last_hist, hist, cv2.HISTCMP_CORREL)

# Check if a new scene should be triggered

# TODO(#53): We should probably normalize the threshold based on the frame size, as
# larger images will have more pixels in each bin.
if hist_diff >= self._threshold and ((frame_num - self._last_scene_cut)
# Set a correlation threshold to determine scene changes.
# The threshold value should be between -1 (perfect negative correlation, not applicable here)
# and 1 (perfect positive correlation, identical histograms).
# Values close to 1 indicate very similar frames, while lower values suggest changes.
# Example: If `_threshold` is set to 0.8, it implies that only changes resulting in a correlation
# less than 0.8 between histograms will be considered significant enough to denote a scene change.
if hist_diff <= self._threshold and ((frame_num - self._last_scene_cut)
>= self._min_scene_len):
cut_list.append(frame_num)
self._last_scene_cut = frame_num
Expand All @@ -108,82 +107,52 @@ def process_frame(self, frame_num: int, frame_img: numpy.ndarray) -> List[int]:

return cut_list

def _quantize_frame(self, frame_img, bits):
"""Quantizes the image based on the number of most significant figures to be preserved.

Arguments:
frame_img: The 8-bit rgb image of the frame being analyzed.
bits: The number of most significant bits to keep during quantization.

Returns:
[red_img, green_img, blue_img]:
The three separated color channels of the frame image that have been quantized.
def calculate_histogram(self,
frame_img: numpy.ndarray,
bins: int = 256,
normalize: bool = True) -> numpy.ndarray:
"""
# First, find the value of the number of most significant bits, padding with zeroes
bit_value = int(bin(2**bits - 1).ljust(10, '0'), 2)

# Separate R, G, and B color channels and cast to int for easier bitwise operations
red_img = frame_img[:, :, 0].astype(int)
green_img = frame_img[:, :, 1].astype(int)
blue_img = frame_img[:, :, 2].astype(int)

# Quantize the frame images
red_img = red_img & bit_value
green_img = green_img & bit_value
blue_img = blue_img & bit_value

return [red_img, green_img, blue_img]

def _shift_bits(self, quantized_imgs, bits):
"""Takes care of the bit shifting operations to combine the RGB color
channels into a single array.

Arguments:
quantized_imgs: A list of the three quantized images of the RGB color channels
respectively.
bits: The number of most significant bits to use for quantizing the image.
Calculates and optionally normalizes the histogram of the luma (Y) channel of an image converted from BGR to YUV color space.

This function extracts the Y channel from the given BGR image, computes its histogram with the specified number of bins,
and optionally normalizes this histogram to have a sum of one across all bins.

Args:
-----
frame_img : np.ndarray
The input image in BGR color space, assumed to have shape (height, width, 3)
where the last dimension represents the BGR channels.
bins : int, optional (default=256)
The number of bins to use for the histogram.
normalize : bool, optional (default=True)
A boolean flag that determines whether the histogram should be normalized
such that the sum of all histogram bins equals 1.

Returns:
composite_img: The resulting array after all bitwise operations.
--------
np.ndarray
A 1D numpy array of length equal to `bins`, representing the histogram of the luma channel.
Each element in the array represents the count (or frequency) of a particular luma value in the image.
If normalized, these values represent the relative frequency.

Examples:
---------
>>> img = cv2.imread('path_to_image.jpg')
>>> hist = calculate_histogram(img, bins=256, normalize=True)
>>> print(hist.shape)
(256,)
"""
# First, figure out how much each shift needs to be
blue_shift = 8 - bits
green_shift = 8 - 2 * bits
red_shift = 8 - 3 * bits

# Separate our color channels for ease
red_img = quantized_imgs[0]
green_img = quantized_imgs[1]
blue_img = quantized_imgs[2]

# Perform the bit shifting for each color
red_img = self._shift_images(img=red_img, img_shift=red_shift)
green_img = self._shift_images(img=green_img, img_shift=green_shift)
blue_img = self._shift_images(img=blue_img, img_shift=blue_shift)

# Join our rgb arrays together
composite_img = numpy.bitwise_or(red_img, numpy.bitwise_or(green_img, blue_img))
# Extract Luma channel from the frame image
y, _, _ = cv2.split(cv2.cvtColor(frame_img, cv2.COLOR_BGR2YUV))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to use YUV over something else, say HSV/HSL? I wonder if it's computationally cheap enough that we do something similar to ContentDetector, and calculate hue/saturation histograms in addition to luma. That might improve robustness in certain situations. Thoughts?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is that Y channel is not much affected by changes in lighting condition, making it bit more robust than RBG or HSV.
Y channel holds perceptually important information and YCbCr color space is very similar to how eyes perceive light

The color conversion is computationally lightweight since it simple mat diff operation

Copy link
Owner

@Breakthrough Breakthrough Apr 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was unaware that the Y channel isn't as affected during changes in lighting (Edit: how does that compare with HSL?).

Good to know, that may have impact on another issue in a related project. I think the last thing I would like to check in with then is, is there any value in allowing both color vs. this luma-only method?

Aside from perception of brightness, it's possible that users might require color information as well. If that's the case we can extend this by allowing the target colorspace to be chosen, and weights placed on each channel. That can certainly be done as a follow-up however, so feel free to mark this as resolved.


return composite_img

def _shift_images(self, img, img_shift):
"""Do bitwise shifting operations for a color channel image checking for shift direction.

Arguments:
img: A quantized image of a single color channel
img_shift: How many bits to shift the values of img. If the value is negative, the shift
direction is to the left and 8 is added to make it a positive value.
# Create the histogram with a bin for every rgb value
hist = cv2.calcHist([y], [0], None, [bins], [0, 256])

Returns:
shifted_img: The bitwise shifted image.
"""
if img_shift < 0:
img_shift += 8
shifted_img = numpy.left_shift(img, img_shift)
else:
shifted_img = numpy.right_shift(img, img_shift)
if normalize:
# Normalize the histogram
hist = cv2.normalize(hist, hist).flatten()

return shifted_img
return hist

def is_processing_required(self, frame_num: int) -> bool:
return True
Expand Down
2 changes: 1 addition & 1 deletion website/pages/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The threshold-based scene detector (`detect-threshold`) is how most traditional

## Histogram Detector

The color histogram detector uses color information to detect fast cuts. The input video for this detector must be in 8-bit color. The detection algorithm consists of separating the three RGB color channels and then quantizing them by eliminating all but the given number of most significant bits (`--bits/-b`). The resulting quantized color channels are then bit shifted and joined together into a new, composite image. A histogram is then constructed from the pixel values in the new, composite image. This histogram is compared element-wise with the histogram from the previous frame and if the total difference between the two adjacent histograms exceeds the given threshold (`--threshold/-t`), then a new scene is triggered.
The scene change detection algorithm uses histograms of the Y channel in the YCbCr color space to detect scene changes, which helps mitigate issues caused by lighting variations. Each frame of the video is converted from its original color space to the YCbCr color space.The Y channel, which represents luminance, is extracted from the YCbCr color space. This helps in focusing on intensity variations rather than color variations. A histogram of the Y channel is computed using the specified number of bins (--bins/-b). The histogram is normalized to ensure that it can be consistently compared with histograms from other frames. The normalized histogram of the current frame is compared with the normalized histogram of the previous frame using the correlation method (cv2.HISTCMP_CORREL). A scene change is detected if the correlation between the histograms of consecutive frames is below the specified threshold (--threshold/-t). This indicates a significant change in luminance, suggesting a scene change.

## Perceptual Hash Detector

Expand Down