HEX
Server: Apache/2.4.65 (Debian)
System: Linux 88f31f35b0b8 6.1.0-38-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.147-1 (2025-08-02) x86_64
User: www-data (33)
PHP: 8.2.29
Disabled: NONE
Upload Files
File: /var/www/html/wp-content/plugins/tiny-compress-images/src/class-tiny-source-base.php
<?php

/*
* Tiny Compress Images - WordPress plugin.
* Copyright (C) 2015-2018 Tinify B.V.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

abstract class Tiny_Source_Base {

	public $raw_html;
	protected $base_dir;
	protected $allowed_domains;
	protected $valid_mimetypes;

	public function __construct( $html, $base_dir, $domains ) {
		$this->raw_html        = $html;
		$this->base_dir        = $base_dir;
		$this->allowed_domains = $domains;
		$this->valid_mimetypes = array( 'image/avif', 'image/webp' );
	}

	protected static function get_attribute_value( $element, $name ) {
		// Match the exact attribute name (not part of data-media, mediaType, etc.)
		// and capture a single- or double-quoted value.
		$delim = '~';
		$attr  = preg_quote( $name, $delim );
		$regex = $delim . '(?<![\w:-])' . $attr . '\s*=\s*(["\'])(.*?)\1' . $delim . 'is';

		if ( preg_match( $regex, $element, $m ) ) {
			return $m[2];
		}
		return null;
	}

	/**
	 * Extract elements by tag name from an HTML string (regex-based).
	 *
	 * @param string $html     The HTML string to search in.
	 * @param string $tagname  The tag name (e.g., 'div', 'source', 'img').
	 * @return array           Array of matched elements as strings.
	 */
	protected function get_element_by_tag( $html, $tagname ) {
		$results = array();

		// Self-closing / void tag (e.g. <source />, <img />, <br />)
		if ( preg_match_all(
			'~<' . preg_quote( $tagname, '~' ) . '\b(?:[^>"\']+|"[^"]*"|\'[^\']*\')*/?>~i',
			$html,
			$matches
		) ) {
			$results = array_merge( $results, $matches[0] );
		}

		// Normal paired tags (e.g. <div>…</div>)
		$regex_tag = preg_quote( $tagname, '~' );
		if ( preg_match_all(
			'~<' . $regex_tag .
				'\b(?:[^>"\']+|"[^"]*"|\'[^\']*\')*>.*?</' .
				$regex_tag .
				'>~is',
			$html,
			$matches
		) ) {
			$results = array_merge( $results, $matches[0] );
		}

		return $results;
	}

	protected function get_local_path( $url ) {
		if ( strpos( $url, 'http' ) === 0 ) {
			$matched_domain = null;

			foreach ( $this->allowed_domains as $domain ) {
				if ( strpos( $url, $domain ) === 0 ) {
					$matched_domain = $domain;
					break;
				}
			}

			if ( null === $matched_domain ) {
				return '';
			}

			$url = substr( $url, strlen( $matched_domain ) );
		}
		$url = $this->base_dir . $url;

		return $url;
	}

	protected function get_formatted_source( $image_source_data, $mimetype ) {
		$format_url = Tiny_Helpers::replace_file_extension( $mimetype, $image_source_data['path'] );
		$local_path = $this->get_local_path( $format_url );
		if ( empty( $local_path ) ) {
			return null;
		}

		$exists_local = file_exists( $local_path );
		if ( $exists_local ) {
			return array(
				'src'  => $format_url,
				'size' => $image_source_data['size'],
				'type' => $mimetype,
			);
		}
		return null;
	}

	/**
	 * Retrieves the sources from the <img> or <source> element
	 *
	 * @return array{path: string, size: string}[] The image sources
	 */
	protected function get_image_srcsets( $html ) {
		$result = array();
		$srcset = $this::get_attribute_value( $html, 'srcset' );

		if ( $srcset ) {
			// Split the srcset to get individual entries
			$srcset_entries = explode( ',', $srcset );

			foreach ( $srcset_entries as $entry ) {
				// Trim whitespace
				$entry = trim( $entry );

				// Split by whitespace to separate path and size/density descriptor
				$parts = preg_split( '/\s+/', $entry, 2 );

				if ( count( $parts ) === 2 ) {
					// We have both path and size
					$result[] = array(
						'path' => $parts[0],
						'size' => $parts[1],
					);
				} elseif ( count( $parts ) === 1 ) {
					// We only have a path, will be interpreted as pixel
					// density 1x (unusual in srcset)
					$result[] = array(
						'path' => $parts[0],
						'size' => '',
					);
				}
			}
		}
		return $result;
	}

	/**
	 * Retrieves the sources from the <img> or <source> element
	 *
	 * @return array{path: string, size: string}[] The image sources
	 */
	private function get_image_src( $html ) {
		$source = $this::get_attribute_value( $html, 'src' );
		if ( ! empty( $source ) ) {
			// No srcset, but we have a src attribute
			return array(
				'path' => $source,
				'size' => '',
			);
		}
		return array();
	}


	/**
	 * Creates one or more <source> elements if alternative formats
	 * are available.
	 *
	 * @param string $original_source_html, either <source> or <img>
	 * @return array{string} array of <source> html
	 */
	protected function create_alternative_sources( $original_source_html ) {
		$srcsets = $this->get_image_srcsets( $original_source_html );
		if ( empty( $srcsets ) ) {
			// no srcset, try src attribute
			$src = $this->get_image_src( $original_source_html );
			if ( ! empty( $src ) ) {
				$srcsets[] = $src;
			}
		}

		if ( empty( $srcsets ) ) {
			return array();
		}

		$is_source_tag = (bool) preg_match( '#<source\b#i', $original_source_html );

		$sources          = array();
		$width_descriptor = $this->get_largest_width_descriptor( $srcsets );

		foreach ( $this->valid_mimetypes as $mimetype ) {
			$srcset_parts = array();

			foreach ( $srcsets as $srcset ) {
				$alt_source = $this->get_formatted_source( $srcset, $mimetype );
				if ( $alt_source ) {
					$srcset_parts[] = trim( $alt_source['src'] . ' ' . $alt_source['size'] );
				}
			}

			if (
				$width_descriptor &&
				! self::srcset_contains_width_descriptor(
					$srcset_parts,
					$width_descriptor
				)
			) {
				continue;
			}

			if ( empty( $srcset_parts ) ) {
				continue;
			}

			$source_attr_parts = array();

			$srcset_attr                 = implode( ', ', $srcset_parts );
			$source_attr_parts['srcset'] = $srcset_attr;

			if ( $is_source_tag ) {
				foreach ( array( 'sizes', 'media', 'width', 'height' ) as $attr ) {
					$attr_value = $this->get_attribute_value( $original_source_html, $attr );
					if ( $attr_value ) {
						$source_attr_parts[ $attr ] = $attr_value;
					}
				}
			} else {
				$sizes_value = $this->get_attribute_value( $original_source_html, 'sizes' );
				if ( $sizes_value ) {
					$source_attr_parts['sizes'] = $sizes_value;
				}
			}

			$source_attr_parts['type'] = $mimetype;
			$source_parts              = array( '<source' );
			foreach ( $source_attr_parts as $source_attr_name => $source_attr_val ) {
				$source_parts[] = $source_attr_name . '="' . esc_attr( $source_attr_val ) . '"';
			}
			$source_parts[] = '/>';
			$sources[]      = implode( ' ', $source_parts );
		} // End foreach().

		return $sources;
	}

	/**
	 * Returns the largest numeric width descriptor
	 * (e.g. 2000 from "2000w") found in the srcset data.
	 *
	 * @param array<array{path: string, size: string}> $srcsets
	 * @return int
	 */
	public static function get_largest_width_descriptor( $srcsets ) {
		$largest = 0;

		foreach ( $srcsets as $srcset ) {
			if ( empty( $srcset['size'] ) ) {
				continue;
			}

			if ( preg_match( '/(\d+)w/', $srcset['size'], $matches ) ) {
				$width = (int) $matches[1];
				if ( $width > $largest ) {
					$largest = $width;
				}
			}
		}

		return $largest;
	}

	/**
	 * Determines whether a srcset list contains the provided width descriptor.
	 *
	 * @param string[] $srcset_parts
	 * @param int      $width_descriptor
	 * @return bool    true if width is in srcset
	 */
	public static function srcset_contains_width_descriptor( $srcset_parts, $width_descriptor ) {
		if ( empty( $srcset_parts ) || $width_descriptor <= 0 ) {
			return false;
		}

		$suffix        = ' ' . $width_descriptor . 'w';
		$suffix_length = strlen( $suffix );

		foreach ( $srcset_parts as $srcset_part ) {
			if ( substr( $srcset_part, -$suffix_length ) === $suffix ) {
				return true;
			}
		}

		return false;
	}
}