File: /var/www/html/wp-content/plugins/media-cleaner/classes/engine.php
<?php
class Meow_WPMC_Engine {
private $core;
private $admin;
function __construct( $core, $admin ) {
$this->core = $core;
$this->admin = $admin;
}
/*
STEP 1: Parse the content, and look for references
*/
/**
* Returns the posts to check the references
* @param int $offset Negative number means no limit
* @param int $size Negative number means no limit
* @return NULL|array
*/
function get_posts_to_check( $offset = -1, $size = -1 ) {
global $wpdb;
$r = null;
// Maybe we could avoid to check more post_types.
// SELECT post_type, COUNT(*) FROM `wp_posts` GROUP BY post_type
$q = <<<SQL
SELECT p.ID FROM $wpdb->posts p
WHERE p.post_status NOT IN ('inherit', 'trash', 'auto-draft')
AND p.post_type NOT IN ('attachment', 'shop_order', 'shop_order_refund', 'nav_menu_item', 'revision', 'auto-draft', 'wphb_minify_group', 'customize_changeset', 'oembed_cache', 'nf_sub', 'jp_img_sitemap')
AND p.post_type NOT LIKE 'dlssus_%'
AND p.post_type NOT LIKE 'ml-slide%'
AND p.post_type NOT LIKE '%acf-%'
AND p.post_type NOT LIKE '%edd_%'
SQL;
if ( $offset >= 0 && $size >= 0 ) {
$q .= " LIMIT %d, %d";
$r = $wpdb->get_col( $wpdb->prepare( $q, $offset, $size ) );
} else // No limit
$r = $wpdb->get_col( $q );
return $r;
}
/**
* Returns the count of posts to check (memory-efficient alternative to count(get_posts_to_check()))
* @return int
*/
function count_posts_to_check() {
global $wpdb;
$q = <<<SQL
SELECT COUNT(p.ID) FROM $wpdb->posts p
WHERE p.post_status NOT IN ('inherit', 'trash', 'auto-draft')
AND p.post_type NOT IN ('attachment', 'shop_order', 'shop_order_refund', 'nav_menu_item', 'revision', 'auto-draft', 'wphb_minify_group', 'customize_changeset', 'oembed_cache', 'nf_sub', 'jp_img_sitemap')
AND p.post_type NOT LIKE 'dlssus_%'
AND p.post_type NOT LIKE 'ml-slide%'
AND p.post_type NOT LIKE '%acf-%'
AND p.post_type NOT LIKE '%edd_%'
SQL;
return (int) $wpdb->get_var( $q );
}
// Parse the posts for references (based on $limit and $limitsize for paging the scan)
function extractRefsFromContent( $limit, $limitsize, &$message = '', $post_id = null ) {
if ( empty( $limit ) ) {
$this->core->reset_issues();
$this->core->reset_references();
$this->core->reset_progress( );
}
$method = $this->core->current_method;
// Check content is a different option depending on the method
$check_content = false;
if ( $method === 'media' ) {
$check_content = $this->core->get_option( 'content' );
}
else if ( $method === 'files' ) {
$check_content = $this->core->get_option( 'filesystem_content' );
}
if ( $method == 'media' && !$check_content ) {
$message = __( "Skipped, as Content is not selected.", 'media-cleaner' );
return true;
}
if ( $method == 'files' && !$check_content ) {
$message = __( "Skipped, as Content is not selected.", 'media-cleaner' );
return true;
}
// Initialize the parsers
do_action( 'wpmc_initialize_parsers' );
$posts = $post_id !== null ? [ $post_id ] : $this->get_posts_to_check( $limit, $limitsize );
// Only at the beginning, check the Widgets and the Scan Once in the Parsers
if ( empty( $limit ) ) {
$this->core->log( "🏁 Extracting refs from content..." );
//if ( get_option( 'wpmc_widgets', false ) ) {
global $wp_registered_widgets;
$syswidgets = $wp_registered_widgets;
$active_widgets = get_option( 'sidebars_widgets' );
foreach ( $active_widgets as $sidebar_name => $widgets ) {
if ( $sidebar_name != 'wp_inactive_widgets' && !empty( $widgets ) && is_array( $widgets ) ) {
foreach ( $widgets as $key => $widget ) {
do_action( 'wpmc_scan_widget', $syswidgets[$widget] );
}
}
}
do_action( 'wpmc_scan_widgets' );
//}
do_action( 'wpmc_scan_once' );
}
$this->core->timeout_check_start( count( $posts ) );
$is_debug = $this->core->is_debug();
foreach ( $posts as $post ) {
$this->core->timeout_check();
// Debug logging for timeout detection
if ( $is_debug ) {
$post_obj = get_post( $post );
$post_type = $post_obj ? $post_obj->post_type : 'unknown';
$post_title = $post_obj ? substr( $post_obj->post_title, 0, 50 ) : 'no title';
$start_time = microtime( true );
$this->core->log( "🔍 Processing post ID: $post | Type: $post_type | Title: $post_title" );
}
// Check content
if ( $check_content ) {
do_action( 'wpmc_scan_postmeta', $post );
$html = get_post_field( 'post_content', $post );
do_action( 'wpmc_scan_post', $html, $post );
}
// Extra scanning methods
// do_action( 'wpmc_scan_extra', $post );
if ( $is_debug ) {
$elapsed_ms = round( ( microtime( true ) - $start_time ) * 1000, 2 );
$this->core->log( "✓ Completed post ID: $post in {$elapsed_ms}ms" );
}
$this->core->timeout_check_additem();
}
// Write the references found (and cached) by the parsers
$this->core->write_references();
$this->core->save_progress( 'extractReferencesFromContent', array(
'type' => 'content',
'limit' => $limit,
'limitSize' => $limitsize
) );
$finished = count( $posts ) < $limitsize;
if ( $finished )
{
$this->core->log();
$this->core->save_progress( 'extractReferencesFromContent_finished' );
}
$elapsed = $this->core->timeout_get_elapsed();
$message = sprintf(
// translators: %1$d is number of posts, %2$s is time in milliseconds
__( "Extracted references from %1\$d posts in %2\$s.", 'media-cleaner' ), count( $posts ), $elapsed
);
return $finished;
}
function extractRefsFromThumbnails( $limit, $limitsize ) {
$medias = $this->get_media_entries( $limit, $limitsize, false );
require_once ABSPATH . 'wp-admin/includes/image.php';
// Get the sizes that should be marked as issues
$force_issue_sizes = $this->core->get_option( 'thumbnail_force_issues' );
if ( !is_array( $force_issue_sizes ) ) {
$force_issue_sizes = [];
}
foreach ( $medias as $media_id ) {
$file = get_attached_file( $media_id );
$meta = wp_get_attachment_metadata( $media_id );
if ( ! is_array( $meta ) || ! isset( $meta['sizes'] ) ) {
$meta = array( 'sizes' => array() );
}
// Get the current registered image sizes
$needed_sizes = wp_get_registered_image_subsizes();
foreach ( $needed_sizes as $size => $size_data ) {
$image_path = path_join( dirname( $file ), $meta['sizes'][ $size ]['file'] ?? '' );
$file_exists = isset( $meta['sizes'][ $size ] ) && file_exists( $image_path ) && filesize( $image_path ) > 0;
// Generate the thumbnail size.
$resized = null;
$origin = "{OG_THUMB}";
if( !$file_exists ) {
$resized = image_make_intermediate_size( $file, $size_data['width'], $size_data['height'], $size_data['crop'] ?? true );
$origin = "{GEN_THUMB}";
}
$image_path = $this->core->clean_url( $image_path );
// Check if this size should be marked as an issue instead of a reference
if ( in_array( $size, $force_issue_sizes ) ) {
// Mark as issue instead of reference
$this->core->add_issue( $image_path, 'FORCED_THUMBNAIL_ISSUE', $media_id );
} else {
// Add a reference for generated thumbnail
$this->core->add_reference_url(
$image_path,
$origin . $size,
$media_id, ['force_cache' => true ]
);
}
if ( $resized ) {
$meta['sizes'][ $size ] = $resized;
}
}
wp_update_attachment_metadata( $media_id, $meta );
}
$this->core->write_references();
$this->core->save_progress( 'extractReferencesFromThumbnails', array(
'type' => 'thumbnails',
'limit' => $limit,
'limitSize' => $limitsize
) );
$finished = count( $medias ) < $limitsize;
if ( $finished )
{
$this->core->save_progress( 'extractReferencesFromThumbnails_finished' );
$this->core->log("Finished extracting refs from Thumbnails.");
}
return $finished;
}
// For each media, let's get a hash of the file and add it as a reference
function extractRefsFromDuplicates( $limit, $limitsize ) {
$medias = $this->get_media_entries( $limit, $limitsize, false );
foreach ( $medias as $media ) {
$paths = $this->core->get_paths_from_attachment( $media );
foreach ( $paths as $path ) {
$fullPath = trailingslashit( $this->core->upload_path ) . $path;
if ( file_exists( $fullPath ) ) {
$hash = md5_file( $fullPath );
$this->core->add_reference_url($path, 'HASH:' . $hash, null, ['force_cache' => true ]);
}
}
}
$this->core->write_references();
$this->core->save_progress( 'extractReferencesFromDuplicates', array(
'type' => 'duplicates',
'limit' => $limit,
'limitSize' => $limitsize
) );
$finished = count( $medias ) < $limitsize;
if ( $finished )
{
$this->core->save_progress( 'extractReferencesFromDuplicates_finished' );
$this->core->log("Finished extracting refs from Duplicates.");
}
return $finished;
}
// Parse the posts for references (based on $limit and $limitsize for paging the scan)
function extractRefsFromLibrary( $limit, $limitsize, &$message = '', $post_id = null ) {
$method = $this->core->current_method;
if ( $method == 'media' ) {
$message = __( "Skipped, as it is not needed for the Media Library method.", 'media-cleaner' );
return true;
}
$check_library = $this->core->get_option( 'media_library' );
if ( !$check_library ) {
$message = __( "Skipped, as Media Library is not selected.", 'media-cleaner' );
return true;
}
$medias = $this->get_media_entries( $limit, $limitsize, false, $post_id );
// Only at the beginning
if ( empty( $limit ) ) {
$this->core->log( "🏁 Extracting refs from Media Library..." );
}
$this->core->timeout_check_start( count( $medias ) );
foreach ( $medias as $media ) {
$this->core->timeout_check();
// Check the media
$paths = $this->core->get_paths_from_attachment( $media );
$this->core->add_reference_url( $paths, 'MEDIA LIBRARY' );
$this->core->timeout_check_additem();
}
// Write the references found (and cached) by the parsers
$this->core->write_references();
$this->core->save_progress( 'extractReferencesFromLibrary', array(
'type' => 'library',
'limit' => $limit,
'limitSize' => $limitsize
) );
$finished = count( $medias ) < $limitsize;
if ( $finished )
{
$this->core->save_progress( 'extractReferencesFromLibrary_finished' );
$this->core->log("Finished extracting refs from Media Library.");
}
$elapsed = $this->core->timeout_get_elapsed();
$message = sprintf( __( "Extracted references from %d medias in %s.", 'media-cleaner' ), count( $medias ), $elapsed );
return $finished;
}
/*
STEP 2: List the media entries (or files)
*/
function get_hash_duplicates() {
// Get the hashes from the referenes ( unique ones )
global $wpdb;
$hashes = $wpdb->get_col( "SELECT DISTINCT originType FROM {$wpdb->prefix}mclean_refs" );
return $hashes;
}
// Get files in /uploads (if path is null, the root of /uploads is returned)
function get_files( $path = null, $offset = 0, $limit = -1 ) {
$files = apply_filters( 'wpmc_list_uploaded_files', null, $path, $offset, $limit );
return $files ? $files : array();
}
/**
* Returns the media entries to check the references
* @param int $offset Negative number means no limit
* @param int $size Negative number means no limit
* @param bool $unattachedOnly
* @param int|null $post_parent_id If this is set with $unattachedOnly, this is ignored. ($unattachedOnly is prioritized)
* @return NULL|array
*/
function get_media_entries( $offset = -1, $size = -1, $unattachedOnly = false, $post_parent_id = null ) {
global $wpdb;
$r = null;
$extraAnd = $unattachedOnly
? "AND p.post_parent = 0"
: ( $post_parent_id !== null
? $wpdb->prepare( "AND p.post_parent = %d", $post_parent_id )
: '' );
$q = <<<SQL
SELECT p.ID FROM $wpdb->posts p
WHERE p.post_status = 'inherit'
$extraAnd
AND p.post_type = 'attachment'
SQL;
if ( $this->core->get_option( 'images_only' ) ) {
// Get only media entries which are images
$q .= " AND p.post_mime_type IN ( 'image/jpeg', 'image/gif', 'image/png', 'image/webp',
'image/bmp', 'image/tiff', 'image/x-icon', 'image/svg' )";
}
if ( $offset >= 0 && $size >= 0 ) {
$q .= " LIMIT %d, %d";
$r = $wpdb->get_col( $wpdb->prepare( $q, $offset, $size ) );
} else // No limit
$r = $wpdb->get_col( $q );
return $r;
}
/**
* Returns the count of media entries (memory-efficient alternative to count(get_media_entries()))
* @param bool $unattachedOnly
* @return int
*/
function count_media_entries( $unattachedOnly = false ) {
global $wpdb;
$extraAnd = $unattachedOnly ? "AND p.post_parent = 0" : '';
$q = <<<SQL
SELECT COUNT(p.ID) FROM $wpdb->posts p
WHERE p.post_status = 'inherit'
$extraAnd
AND p.post_type = 'attachment'
SQL;
if ( $this->core->get_option( 'images_only' ) ) {
$q .= " AND p.post_mime_type IN ( 'image/jpeg', 'image/gif', 'image/png', 'image/webp',
'image/bmp', 'image/tiff', 'image/x-icon', 'image/svg' )";
}
return (int) $wpdb->get_var( $q );
}
/*
STEP 3: Check the media entries (or files) against the references
*/
function check_duplicates( $hash ) {
// Check if the hash exists in the database
global $wpdb;
$table_name_issues = $wpdb->prefix . "mclean_scan";
$table_name_refs = $wpdb->prefix . "mclean_refs";
$request = ( $wpdb->prepare( "SELECT mediaUrl FROM $table_name_refs WHERE originType LIKE %s", $hash ) );
$medias = $wpdb->get_col( $request );
if( count( $medias ) <= 1 ) {
// No issue
return false;
}
foreach ( $medias as $media ) {
$filepath = trailingslashit( $this->core->upload_path ) . stripslashes( $media );
$clean_path = $this->core->clean_uploaded_filename( $media );
$filesize = file_exists( $filepath ) ? filesize ($filepath) : 0;
// Let's find out if there is a parentId for this file
$potentialParentPath = $this->core->clean_url_from_resolution( $clean_path );
$parentId = $wpdb->get_var( $wpdb->prepare( "SELECT id FROM $table_name_issues WHERE path = %s", $potentialParentPath ) );
$parentId = $parentId ? (int)$parentId : null;
$wpdb->insert( $table_name_issues,
array(
'time' => current_time('mysql'),
'type' => 0,
'path' => $clean_path,
'size' => $filesize,
'issue' => 'DUPLICATE',
'parentId' => $parentId
)
);
}
return true;
}
function check_media( $media ) {
return $this->core->check_media( $media );
}
function check_file( $file ) {
// Basically, wpmc_check_file returns either true if it's used, or
// the codename of the issue.
$issue = apply_filters( 'wpmc_check_file', false, $file );
$used = $issue === true;
if ( !$used ) {
global $wpdb;
$filepath = trailingslashit( $this->core->upload_path ) . stripslashes( $file );
$clean_path = $this->core->clean_uploaded_filename( $file );
$table_name = $wpdb->prefix . "mclean_scan";
$filesize = file_exists( $filepath ) ? filesize ($filepath) : 0;
// Let's find out if there is a parentId for this file
$potentialParentPath = $this->core->clean_url_from_resolution( $clean_path );
$parentId = $wpdb->get_var( $wpdb->prepare( "SELECT id FROM $table_name WHERE path = %s", $potentialParentPath ) );
$parentId = $parentId ? (int)$parentId : null;
$wpdb->insert( $table_name,
array(
'time' => current_time('mysql'),
'type' => 0,
'path' => $clean_path,
'size' => $filesize,
'issue' => $issue,
'parentId' => $parentId
)
);
}
return $used;
}
}
?>