From 4f7507391d525a5ebbebfa572ea62c3121721cee Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 22:52:51 +0000 Subject: [PATCH 1/9] Initial plan From cd7eb24c4d82b1cd6d237d8b0426ed3212b5b25e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 23:00:43 +0000 Subject: [PATCH 2/9] Add --skip-duplicates flag to wp media import command Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- features/media-import.feature | 66 +++++++++++++++++++++++++++++++++++ src/Media_Command.php | 62 ++++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 2 deletions(-) diff --git a/features/media-import.feature b/features/media-import.feature index 8e75fa95..f5b68ac1 100644 --- a/features/media-import.feature +++ b/features/media-import.feature @@ -318,3 +318,69 @@ Feature: Manage WordPress attachments """ /foo/large-image.jpg """ + + Scenario: Skip importing a local file that was already imported + Given download: + | path | url | + | {CACHE_DIR}/large-image.jpg | http://wp-cli.org/behat-data/large-image.jpg | + + When I run `wp media import {CACHE_DIR}/large-image.jpg --porcelain` + Then save STDOUT as {ATTACHMENT_ID} + And STDOUT should not be empty + + When I run `wp media import {CACHE_DIR}/large-image.jpg --skip-duplicates` + Then STDOUT should contain: + """ + Skipped importing file + """ + And STDOUT should contain: + """ + already exists as attachment ID {ATTACHMENT_ID} + """ + And STDOUT should contain: + """ + Success: Imported 0 of 1 items (1 skipped). + """ + And the return code should be 0 + + Scenario: Skip importing a remote file that was already imported + When I run `wp media import 'http://wp-cli.org/behat-data/codeispoetry.png' --porcelain` + Then save STDOUT as {ATTACHMENT_ID} + And STDOUT should not be empty + + When I run `wp media import 'http://wp-cli.org/behat-data/codeispoetry.png' --skip-duplicates` + Then STDOUT should contain: + """ + Skipped importing file + """ + And STDOUT should contain: + """ + already exists as attachment ID {ATTACHMENT_ID} + """ + And STDOUT should contain: + """ + Success: Imported 0 of 1 items (1 skipped). + """ + And the return code should be 0 + + Scenario: Import new file while skipping duplicates from a batch + Given download: + | path | url | + | {CACHE_DIR}/large-image.jpg | http://wp-cli.org/behat-data/large-image.jpg | + + When I run `wp media import {CACHE_DIR}/large-image.jpg` + Then STDOUT should contain: + """ + Success: Imported 1 of 1 items. + """ + + When I run `wp media import {CACHE_DIR}/large-image.jpg 'http://wp-cli.org/behat-data/codeispoetry.png' --skip-duplicates` + Then STDOUT should contain: + """ + Skipped importing file + """ + And STDOUT should contain: + """ + Success: Imported 1 of 2 items (1 skipped). + """ + And the return code should be 0 diff --git a/src/Media_Command.php b/src/Media_Command.php index 1fc15663..2ed5cff5 100644 --- a/src/Media_Command.php +++ b/src/Media_Command.php @@ -270,6 +270,9 @@ public function regenerate( $args, $assoc_args = array() ) { * [--featured_image] * : If set, set the imported image as the Featured Image of the post it is attached to. * + * [--skip-duplicates] + * : If set, media files that have already been imported will be skipped. + * * [--porcelain[=]] * : Output a single field for each imported image. Defaults to attachment ID when used as flag. * --- @@ -308,7 +311,7 @@ public function regenerate( $args, $assoc_args = array() ) { * http://wordpress-develop.dev/wp-header-logo/ * * @param string[] $args Positional arguments. - * @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, porcelain?: bool|string} $assoc_args Associative arguments. + * @param array{post_id?: string, post_name?: string, file_name?: string, title?: string, caption?: string, alt?: string, desc?: string, 'skip-copy'?: bool, 'destination-dir'?: string, 'preserve-filetime'?: bool, featured_image?: bool, 'skip-duplicates'?: bool, porcelain?: bool|string} $assoc_args Associative arguments. * @return void */ public function import( $args, $assoc_args = array() ) { @@ -361,6 +364,7 @@ public function import( $args, $assoc_args = array() ) { $number = 0; $successes = 0; $errors = 0; + $skips = 0; foreach ( $args as $file ) { ++$number; if ( 0 === $number % self::WP_CLEAR_OBJECT_CACHE_INTERVAL ) { @@ -379,6 +383,16 @@ public function import( $args, $assoc_args = array() ) { ++$errors; continue; } + if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) { + $existing = $this->find_duplicate_attachment( Utils\basename( $file ), false ); + if ( false !== $existing ) { + if ( ! $porcelain ) { + WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." ); + } + ++$skips; + continue; + } + } if ( Utils\get_flag_value( $assoc_args, 'skip-copy' ) ) { $tempfile = $file; } else { @@ -390,6 +404,16 @@ public function import( $args, $assoc_args = array() ) { $file_time = @filemtime( $file ); } } else { + if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) { + $existing = $this->find_duplicate_attachment( $file, true ); + if ( false !== $existing ) { + if ( ! $porcelain ) { + WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." ); + } + ++$skips; + continue; + } + } $tempfile = download_url( $file ); if ( is_wp_error( $tempfile ) ) { WP_CLI::warning( @@ -542,7 +566,7 @@ public function import( $args, $assoc_args = array() ) { // Report the result of the operation if ( ! Utils\get_flag_value( $assoc_args, 'porcelain' ) ) { - Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors ); + Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors, $skips ); } elseif ( $errors ) { WP_CLI::halt( 1 ); } @@ -692,6 +716,40 @@ private function make_copy( $path ) { return $filename; } + /** + * Finds an existing attachment by filename or source URL. + * + * For local files, matches against the basename of the `_wp_attached_file` meta value. + * This will match the first attachment found when multiple files share the same basename + * in different upload subdirectories. + * + * @param string $file_or_name Basename of the local file, or full URL for remote files. + * @param bool $is_remote Whether to search by source URL (remote) or by filename (local). + * @return int|false Attachment ID if found, false otherwise. + */ + private function find_duplicate_attachment( $file_or_name, $is_remote ) { + global $wpdb; + + if ( $is_remote ) { + $result = $wpdb->get_var( + $wpdb->prepare( + "SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_source_url' AND meta_value = %s LIMIT 1", + $file_or_name + ) + ); + } else { + $result = $wpdb->get_var( + $wpdb->prepare( + "SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_wp_attached_file' AND (meta_value = %s OR meta_value LIKE %s) LIMIT 1", + $file_or_name, + '%/' . $wpdb->esc_like( $file_or_name ) + ) + ); + } + + return $result ? (int) $result : false; + } + /** * Returns a human-readable description for one or more image size names. * From 4a20e61f24141b5e75b0314fa6716d2a0555e197 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 07:55:42 +0000 Subject: [PATCH 3/9] Fix skip-duplicates: use RIGHT() for detection, restore error messages for non-skip-duplicates paths Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- src/Media_Command.php | 46 +++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/src/Media_Command.php b/src/Media_Command.php index 2ed5cff5..a0d8e3c4 100644 --- a/src/Media_Command.php +++ b/src/Media_Command.php @@ -384,7 +384,7 @@ public function import( $args, $assoc_args = array() ) { continue; } if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) { - $existing = $this->find_duplicate_attachment( Utils\basename( $file ), false ); + $existing = $this->find_duplicate_attachment( Utils\basename( $file ) ); if ( false !== $existing ) { if ( ! $porcelain ) { WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." ); @@ -405,7 +405,7 @@ public function import( $args, $assoc_args = array() ) { } } else { if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) { - $existing = $this->find_duplicate_attachment( $file, true ); + $existing = $this->find_duplicate_attachment( (string) explode( '?', Utils\basename( $file ), 2 )[0] ); if ( false !== $existing ) { if ( ! $porcelain ) { WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." ); @@ -566,7 +566,7 @@ public function import( $args, $assoc_args = array() ) { // Report the result of the operation if ( ! Utils\get_flag_value( $assoc_args, 'porcelain' ) ) { - Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors, $skips ); + Utils\report_batch_operation_results( $noun, 'import', count( $args ), $successes, $errors, Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ? $skips : null ); } elseif ( $errors ) { WP_CLI::halt( 1 ); } @@ -717,35 +717,29 @@ private function make_copy( $path ) { } /** - * Finds an existing attachment by filename or source URL. + * Finds an existing attachment whose basename matches the given filename. * - * For local files, matches against the basename of the `_wp_attached_file` meta value. - * This will match the first attachment found when multiple files share the same basename - * in different upload subdirectories. + * Searches the `_wp_attached_file` post meta, which stores the path relative to + * the uploads directory (e.g. '2026/03/image.jpg' or just 'image.jpg'). Matches + * the first attachment found when multiple files share the same basename across + * different upload subdirectories. * - * @param string $file_or_name Basename of the local file, or full URL for remote files. - * @param bool $is_remote Whether to search by source URL (remote) or by filename (local). + * @param string $basename Filename basename to search for (e.g. 'image.jpg'). * @return int|false Attachment ID if found, false otherwise. */ - private function find_duplicate_attachment( $file_or_name, $is_remote ) { + private function find_duplicate_attachment( $basename ) { global $wpdb; - if ( $is_remote ) { - $result = $wpdb->get_var( - $wpdb->prepare( - "SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_source_url' AND meta_value = %s LIMIT 1", - $file_or_name - ) - ); - } else { - $result = $wpdb->get_var( - $wpdb->prepare( - "SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_wp_attached_file' AND (meta_value = %s OR meta_value LIKE %s) LIMIT 1", - $file_or_name, - '%/' . $wpdb->esc_like( $file_or_name ) - ) - ); - } + $slash_basename = '/' . $basename; + + $result = $wpdb->get_var( + $wpdb->prepare( + "SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_wp_attached_file' AND (meta_value = %s OR RIGHT(meta_value, %d) = %s) LIMIT 1", + $basename, + mb_strlen( $slash_basename, 'UTF-8' ), + $slash_basename + ) + ); return $result ? (int) $result : false; } From 5bdea55b36cc5832085199f26a25ed543c8f100a Mon Sep 17 00:00:00 2001 From: Pascal Birchler Date: Fri, 20 Mar 2026 09:09:12 +0100 Subject: [PATCH 4/9] Update src/Media_Command.php Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/Media_Command.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Media_Command.php b/src/Media_Command.php index a0d8e3c4..ffe98e6e 100644 --- a/src/Media_Command.php +++ b/src/Media_Command.php @@ -732,11 +732,17 @@ private function find_duplicate_attachment( $basename ) { $slash_basename = '/' . $basename; + if ( function_exists( 'mb_strlen' ) ) { + $slash_basename_length = mb_strlen( $slash_basename, 'UTF-8' ); + } else { + $slash_basename_length = strlen( $slash_basename ); + } + $result = $wpdb->get_var( $wpdb->prepare( "SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_wp_attached_file' AND (meta_value = %s OR RIGHT(meta_value, %d) = %s) LIMIT 1", $basename, - mb_strlen( $slash_basename, 'UTF-8' ), + $slash_basename_length, $slash_basename ) ); From 551634ec197d82fcd0971394eeebbbcd896fb803 Mon Sep 17 00:00:00 2001 From: Pascal Birchler Date: Fri, 20 Mar 2026 09:09:23 +0100 Subject: [PATCH 5/9] Update src/Media_Command.php Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/Media_Command.php | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Media_Command.php b/src/Media_Command.php index ffe98e6e..cf761587 100644 --- a/src/Media_Command.php +++ b/src/Media_Command.php @@ -740,7 +740,15 @@ private function find_duplicate_attachment( $basename ) { $result = $wpdb->get_var( $wpdb->prepare( - "SELECT post_id FROM {$wpdb->postmeta} WHERE meta_key = '_wp_attached_file' AND (meta_value = %s OR RIGHT(meta_value, %d) = %s) LIMIT 1", + "SELECT p.ID + FROM {$wpdb->posts} p + INNER JOIN {$wpdb->postmeta} pm + ON p.ID = pm.post_id + WHERE p.post_type = 'attachment' + AND p.post_status != 'trash' + AND pm.meta_key = '_wp_attached_file' + AND ( pm.meta_value = %s OR RIGHT(pm.meta_value, %d) = %s ) + LIMIT 1", $basename, $slash_basename_length, $slash_basename From ae7042540de93c3950dc59a4ca5ee08825db2611 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 08:11:25 +0000 Subject: [PATCH 6/9] Use explode() consistently for URL query string stripping in remote file handling Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- src/Media_Command.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Media_Command.php b/src/Media_Command.php index cf761587..12c4cafb 100644 --- a/src/Media_Command.php +++ b/src/Media_Command.php @@ -426,7 +426,7 @@ public function import( $args, $assoc_args = array() ) { ++$errors; continue; } - $name = (string) strtok( Utils\basename( $file ), '?' ); + $name = (string) explode( '?', Utils\basename( $file ), 2 )[0]; } if ( ! empty( $assoc_args['file_name'] ) ) { From 04b6bdcae7d59541cb27720fc5b87c54899d7039 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 09:18:52 +0000 Subject: [PATCH 7/9] Fix duplicate detection for WP 5.3+ scaled images via _wp_original_image_file Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- src/Media_Command.php | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Media_Command.php b/src/Media_Command.php index 12c4cafb..5269c16f 100644 --- a/src/Media_Command.php +++ b/src/Media_Command.php @@ -720,9 +720,11 @@ private function make_copy( $path ) { * Finds an existing attachment whose basename matches the given filename. * * Searches the `_wp_attached_file` post meta, which stores the path relative to - * the uploads directory (e.g. '2026/03/image.jpg' or just 'image.jpg'). Matches - * the first attachment found when multiple files share the same basename across - * different upload subdirectories. + * the uploads directory (e.g. '2026/03/image.jpg' or just 'image.jpg'). Also + * checks `_wp_original_image_file` (absolute path, WP 5.3+) to handle images + * that were scaled down on upload (stored as 'image-scaled.jpg') but whose + * original filename is still 'image.jpg'. Matches the first attachment found + * when multiple files share the same basename across different subdirectories. * * @param string $basename Filename basename to search for (e.g. 'image.jpg'). * @return int|false Attachment ID if found, false otherwise. @@ -746,7 +748,7 @@ private function find_duplicate_attachment( $basename ) { ON p.ID = pm.post_id WHERE p.post_type = 'attachment' AND p.post_status != 'trash' - AND pm.meta_key = '_wp_attached_file' + AND pm.meta_key IN ('_wp_attached_file', '_wp_original_image_file') AND ( pm.meta_value = %s OR RIGHT(pm.meta_value, %d) = %s ) LIMIT 1", $basename, From e94bf00756a158a577f4f28cb6db0ce215c06813 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 20 Mar 2026 09:58:30 +0000 Subject: [PATCH 8/9] Fix duplicate detection for WP 5.3+ scaled images via -scaled basename variant Co-authored-by: swissspidy <841956+swissspidy@users.noreply.github.com> --- src/Media_Command.php | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/Media_Command.php b/src/Media_Command.php index 5269c16f..507c239d 100644 --- a/src/Media_Command.php +++ b/src/Media_Command.php @@ -721,10 +721,10 @@ private function make_copy( $path ) { * * Searches the `_wp_attached_file` post meta, which stores the path relative to * the uploads directory (e.g. '2026/03/image.jpg' or just 'image.jpg'). Also - * checks `_wp_original_image_file` (absolute path, WP 5.3+) to handle images - * that were scaled down on upload (stored as 'image-scaled.jpg') but whose - * original filename is still 'image.jpg'. Matches the first attachment found - * when multiple files share the same basename across different subdirectories. + * checks for the WP 5.3+ big-image scaled variant (e.g. 'image-scaled.jpg') so + * that re-importing a large file that was scaled on first import is correctly + * detected as a duplicate. Matches the first attachment found when multiple files + * share the same basename across different upload subdirectories. * * @param string $basename Filename basename to search for (e.g. 'image.jpg'). * @return int|false Attachment ID if found, false otherwise. @@ -732,12 +732,21 @@ private function make_copy( $path ) { private function find_duplicate_attachment( $basename ) { global $wpdb; - $slash_basename = '/' . $basename; + // WP 5.3+ big-image scaling renames 'image.jpg' → 'image-scaled.jpg' and + // stores the scaled name in _wp_attached_file, so search for both variants. + $ext = pathinfo( $basename, PATHINFO_EXTENSION ); + $name = pathinfo( $basename, PATHINFO_FILENAME ); + $scaled_basename = $name . '-scaled' . ( $ext ? '.' . $ext : '' ); + + $slash_basename = '/' . $basename; + $slash_scaled_basename = '/' . $scaled_basename; if ( function_exists( 'mb_strlen' ) ) { - $slash_basename_length = mb_strlen( $slash_basename, 'UTF-8' ); + $slash_basename_length = mb_strlen( $slash_basename, 'UTF-8' ); + $slash_scaled_basename_length = mb_strlen( $slash_scaled_basename, 'UTF-8' ); } else { - $slash_basename_length = strlen( $slash_basename ); + $slash_basename_length = strlen( $slash_basename ); + $slash_scaled_basename_length = strlen( $slash_scaled_basename ); } $result = $wpdb->get_var( @@ -748,12 +757,20 @@ private function find_duplicate_attachment( $basename ) { ON p.ID = pm.post_id WHERE p.post_type = 'attachment' AND p.post_status != 'trash' - AND pm.meta_key IN ('_wp_attached_file', '_wp_original_image_file') - AND ( pm.meta_value = %s OR RIGHT(pm.meta_value, %d) = %s ) + AND pm.meta_key = '_wp_attached_file' + AND ( + pm.meta_value = %s + OR RIGHT(pm.meta_value, %d) = %s + OR pm.meta_value = %s + OR RIGHT(pm.meta_value, %d) = %s + ) LIMIT 1", $basename, $slash_basename_length, - $slash_basename + $slash_basename, + $scaled_basename, + $slash_scaled_basename_length, + $slash_scaled_basename ) ); From 1cac237c290441fe7c55488672c5aee19f11802a Mon Sep 17 00:00:00 2001 From: Pascal Birchler Date: Sat, 21 Mar 2026 00:10:12 +0100 Subject: [PATCH 9/9] Lint fix --- src/Media_Command.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Media_Command.php b/src/Media_Command.php index c08298dc..bccadc84 100644 --- a/src/Media_Command.php +++ b/src/Media_Command.php @@ -530,7 +530,7 @@ public function import( $args, $assoc_args = array() ) { continue; } if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) { - $existing = $this->find_duplicate_attachment( Utils\basename( $file ) ); + $existing = $this->find_duplicate_attachment( Path::basename( $file ) ); if ( false !== $existing ) { if ( ! $porcelain ) { WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." ); @@ -551,7 +551,7 @@ public function import( $args, $assoc_args = array() ) { } } else { if ( Utils\get_flag_value( $assoc_args, 'skip-duplicates' ) ) { - $existing = $this->find_duplicate_attachment( (string) explode( '?', Utils\basename( $file ), 2 )[0] ); + $existing = $this->find_duplicate_attachment( (string) explode( '?', Path::basename( $file ), 2 )[0] ); if ( false !== $existing ) { if ( ! $porcelain ) { WP_CLI::log( "Skipped importing file '$orig_filename'. Reason: already exists as attachment ID $existing." ); @@ -572,7 +572,7 @@ public function import( $args, $assoc_args = array() ) { ++$errors; continue; } - $name = (string) explode( '?', Path::basename( $file ), 2 )[0]; + $name = (string) explode( '?', Path::basename( $file ), 2 )[0]; } if ( ! empty( $assoc_args['file_name'] ) ) {