HEX
Server: Apache/2.4.65 (Debian)
System: Linux 88f31f35b0b8 6.1.0-38-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.147-1 (2025-08-02) x86_64
User: www-data (33)
PHP: 8.2.29
Disabled: NONE
Upload Files
File: /var/www/html/wp-content/plugins/wpseo-news/classes/sitemap.php
<?php
/**
 * Yoast SEO: News plugin file.
 *
 * @package WPSEO_News\XML_Sitemaps
 */

use Yoast\WP\Lib\ORM;
use Yoast\WP\SEO\Models\Indexable;
use Yoast\WP\SEO\Repositories\Indexable_Repository;

/**
 * Handling the generation of the News Sitemap.
 */
class WPSEO_News_Sitemap {

	/**
	 * The date helper.
	 *
	 * @var WPSEO_Date_Helper
	 */
	protected $date;

	/**
	 * The sitemap basename.
	 *
	 * @var string
	 */
	private $basename;

	/**
	 * Constructor. Set options, basename and add actions.
	 */
	public function __construct() {
		$this->date = new WPSEO_Date_Helper();

		add_action( 'init', [ $this, 'init' ], 10 );

		add_action( 'save_post', [ $this, 'invalidate_sitemap' ] );

		add_action( 'wpseo_news_schedule_sitemap_clear', 'yoast_wpseo_news_clear_sitemap_cache' );
	}

	/**
	 * Add the XML News Sitemap to the Sitemap Index.
	 *
	 * @param string $str String with Index sitemap content.
	 *
	 * @return string
	 */
	public function add_to_index( $str ) {

		// Only add when we have items.
		$items = $this->get_items( 1 );
		if ( empty( $items ) ) {
			return $str;
		}

		$str .= '<sitemap>' . "\n";
		$str .= '<loc>' . self::get_sitemap_name() . '</loc>' . "\n";
		$str .= '<lastmod>' . htmlspecialchars( $this->date->format( get_lastpostdate( 'gmt' ) ), ENT_COMPAT, get_bloginfo( 'charset' ), false ) . '</lastmod>' . "\n";
		$str .= '</sitemap>' . "\n";

		return $str;
	}

	/**
	 * Register the XML News sitemap with the main sitemap class.
	 *
	 * @return void
	 */
	public function init() {

		$this->basename = self::get_sitemap_name( false );

		// Setting stylesheet for cached sitemap.
		add_action( 'wpseo_sitemap_stylesheet_cache_' . $this->basename, [ $this, 'set_stylesheet_cache' ] );

		if ( isset( $GLOBALS['wpseo_sitemaps'] ) ) {
			add_filter( 'wpseo_sitemap_index', [ $this, 'add_to_index' ] );

			$this->yoast_wpseo_news_schedule_clear();

			// We might consider deprecating/removing this, because we are using a static xsl file.
			$GLOBALS['wpseo_sitemaps']->register_sitemap( $this->basename, [ $this, 'build' ] );
			if ( method_exists( $GLOBALS['wpseo_sitemaps'], 'register_xsl' ) ) {
				$xsl_rewrite_rule = sprintf( '^%s-sitemap.xsl$', $this->basename );

				$GLOBALS['wpseo_sitemaps']->register_xsl(
					$this->basename,
					[
						$this,
						'build_news_sitemap_xsl',
					],
					$xsl_rewrite_rule
				);
			}
		}
	}

	/**
	 * Method to invalidate the sitemap.
	 *
	 * @param int $post_id Post ID to invalidate for.
	 *
	 * @return void
	 */
	public function invalidate_sitemap( $post_id ) {
		// If this is just a revision, don't invalidate the sitemap cache yet.
		if ( wp_is_post_revision( $post_id ) ) {
			return;
		}

		// Bail if this is a multisite installation and the site has been switched.
		if ( is_multisite() && ms_is_switched() ) {
			return;
		}

		// Only invalidate when we are in a News Post Type object.
		if ( ! in_array( get_post_type( $post_id ), WPSEO_News::get_included_post_types(), true ) ) {
			return;
		}

		WPSEO_Sitemaps_Cache::invalidate( $this->basename );
	}

	/**
	 * When sitemap is coming out of the cache there is no stylesheet. Normally it will take the default stylesheet.
	 *
	 * This method is called by a filter that will set the video stylesheet.
	 *
	 * @param object $target_object Target Object to set cache from.
	 *
	 * @return object
	 */
	public function set_stylesheet_cache( $target_object ) {
		$target_object->renderer->set_stylesheet( $this->get_stylesheet_line() );

		return $target_object;
	}

	/**
	 * Build the sitemap and push it to the XML Sitemaps Class instance for display.
	 *
	 * @return void
	 */
	public function build() {
		$GLOBALS['wpseo_sitemaps']->set_sitemap( $this->build_sitemap() );
		$GLOBALS['wpseo_sitemaps']->renderer->set_stylesheet( $this->get_stylesheet_line() );
	}

	/**
	 * Building the XML for the sitemap.
	 *
	 * @return string
	 */
	public function build_sitemap() {
		$start_time = microtime( true );

		$output = '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">' . "\n";

		$items = $this->get_items();

		// Loop through items.
		if ( ! empty( $items ) ) {
			$output .= $this->build_items( $items );
		}

		/**
		 * Filter to add extra entries to the news sitemap.
		 *
		 * @param string $content String content to add, defaults to empty.
		 */
		$output .= apply_filters( 'wpseo_news_sitemap_content', '' );

		$output .= '</urlset>';

		$total_time = ( microtime( true ) - $start_time );
		if ( WP_DEBUG ) {
			$output .= '<!-- ' . $total_time . 's / ' . number_format( ( memory_get_peak_usage() / 1024 / 1024 ), 2 ) . 'MB -->';
		}
		return $output;
	}

	/**
	 * Outputs the XSL file.
	 *
	 * @return void
	 */
	public function build_news_sitemap_xsl() {
		$protocol = 'HTTP/1.1';

		if ( isset( $_SERVER['SERVER_PROTOCOL'] ) && is_string( $_SERVER['SERVER_PROTOCOL'] ) && $_SERVER['SERVER_PROTOCOL'] !== '' ) {
			$protocol = sanitize_text_field( wp_unslash( $_SERVER['SERVER_PROTOCOL'] ) );
		}
		// Force a 200 header and replace other status codes.
		header( $protocol . ' 200 OK', true, 200 );
		// Set the right content / mime type.
		header( 'Content-Type: text/xml' );
		// Prevent the search engines from indexing the XML Sitemap.
		header( 'X-Robots-Tag: noindex, follow', true );
		// Make the browser cache this file properly.
		header( 'Pragma: public' );
		header( 'Cache-Control: maxage=' . YEAR_IN_SECONDS );
		header( 'Expires: ' . $this->date->format_timestamp( ( time() + YEAR_IN_SECONDS ), 'D, d M Y H:i:s' ) . ' GMT' );

		/*
		 * Using `readfile()` rather than `include` to prevent issues with XSL being interpreted as PHP
		 * on systems where the PHP ini directived `short_open_tags` is turned on.
		 * phpcs:disable WordPress.WP.AlternativeFunctions.file_system_operations_readfile
		 */
		readfile( dirname( WPSEO_NEWS_FILE ) . '/assets/xml-news-sitemap.xsl' );
		// phpcs:enable

		die();
	}

	/**
	 * Clear the sitemap and sitemap index every hour to make sure the sitemap is hidden or shown when it needs to be.
	 *
	 * @return void
	 */
	private function yoast_wpseo_news_schedule_clear() {
		$schedule = wp_get_schedule( 'wpseo_news_schedule_sitemap_clear' );

		if ( empty( $schedule ) ) {
			wp_schedule_event( time(), 'hourly', 'wpseo_news_schedule_sitemap_clear' );
		}
	}

	/**
	 * Getter for stylesheet URL.
	 *
	 * @return string Stylesheet URL.
	 */
	private function get_stylesheet_line() {
		return "\n" . '<?xml-stylesheet type="text/xsl" href="' . esc_url( $this->get_xsl_url() ) . '"?>';
	}

	/**
	 * Getting all the items for the sitemap.
	 *
	 * @param int $limit The limit for the query, default is 1000 items.
	 *
	 * @return array
	 */
	private function get_items( $limit = 1000 ) {
		global $wpdb;

		// Get supported post types.
		$post_types = WPSEO_News::get_included_post_types();

		if ( empty( $post_types ) ) {
			return [];
		}

		/**
		 * The indexable repository.
		 *
		 * @var Indexable_Repository $repository
		 */
		$repository = YoastSEO()->classes->get( Indexable_Repository::class );

		$query = $repository
			->query()
			->distinct()
			->select_many( 'i.id', 'i.object_id', 'i.object_sub_type', 'i.permalink', 'i.object_published_at' )
			->select( 'breadcrumb_title', 'title' )
			->select( 'pm2.meta_value', 'stock_tickers' )
			->table_alias( 'i' )
			->left_outer_join( $wpdb->postmeta, 'pm.post_id = i.object_id AND pm.meta_key = \'_yoast_wpseo_newssitemap-robots-index\'', 'pm' )
			->left_outer_join( $wpdb->postmeta, 'pm2.post_id = i.object_id AND pm2.meta_key = \'_yoast_wpseo_newssitemap-stocktickers\'', 'pm2' )
			->where( 'i.post_status', 'publish' )
			->where( 'i.object_type', 'post' )
			->where_in( 'object_sub_type', $post_types )
			->where_raw( '( i.is_robots_noindex = 0 OR i.is_robots_noindex IS NULL )' )
			->where_raw( 'i.object_published_at >= UTC_TIMESTAMP() - INTERVAL 48 HOUR' )
			->where_raw( '( pm.meta_value = \'0\' OR pm.meta_value IS NULL )' )
			->order_by_desc( 'i.object_published_at' )
			->limit( $limit );

		$query = $this->maybe_add_terms_query( $query, $post_types );

		return $query->find_many();
	}

	/**
	 * Adds the term query to the sitemap query if required.
	 *
	 * @param ORM      $query      The sitemap query.
	 * @param string[] $post_types The post types.
	 *
	 * @return ORM The modified query.
	 */
	private function maybe_add_terms_query( ORM $query, $post_types ) {
		global $wpdb;

		$excluded_terms = (array) WPSEO_Options::get( 'news_sitemap_exclude_terms', [] );

		if ( empty( $excluded_terms ) ) {
			return $query;
		}

		$excluded_terms_by_post_type = [];
		foreach ( $excluded_terms as $excluded_term => $value ) {
			if ( $value !== 'on' ) {
				continue;
			}
			list( $term_id, $post_type ) = explode( '_for_', $excluded_term, 2 );
			if ( ! array_key_exists( $post_type, $excluded_terms_by_post_type ) ) {
				$excluded_terms_by_post_type[ $post_type ] = [];
			}
			$excluded_terms_by_post_type[ $post_type ][] = (int) $term_id;
		}

		$replacements = [];
		$term_query   = [];
		foreach ( $post_types as $post_type ) {
			if ( ! array_key_exists( $post_type, $excluded_terms_by_post_type ) ) {
				continue;
			}
			$term_ids     = $excluded_terms_by_post_type[ $post_type ];
			$replacements = array_merge( $replacements, [ $post_type ], $term_ids );
			$placeholders = implode( ', ', array_fill( 0, count( $term_ids ), '%d' ) );
			$term_query[] = "( object_sub_type = %s AND t.term_id IN ( $placeholders ) )";
		}
		$term_query = implode( ' OR ', $term_query );

		return $query
			->raw_join(
				"LEFT OUTER JOIN (
					SELECT tr.object_id, tt.term_id
					FROM $wpdb->term_relationships AS tr
					LEFT OUTER JOIN $wpdb->term_taxonomy AS tt ON tr.term_taxonomy_id = tt.term_taxonomy_id
				)",
				"( $term_query ) AND t.object_id = i.object_id",
				't',
				$replacements
			)
			->where_null( 't.object_id' );
	}

	/**
	 * Loop through all $items and build each one of it.
	 *
	 * @param Indexable[] $items Items to convert to sitemap output.
	 *
	 * @return string
	 */
	private function build_items( $items ) {
		$output = '';
		foreach ( $items as $item ) {
			$publication_tag = $this->build_publication_tag( $item );
			$output         .= new WPSEO_News_Sitemap_Item( $item, $publication_tag );
		}

		return $output;
	}

	/**
	 * Builds the publication tag.
	 *
	 * @param Indexable $item The post.
	 *
	 * @return string
	 */
	private function build_publication_tag( $item ) {
		$publication_name = WPSEO_Options::get( 'news_sitemap_name', get_bloginfo( 'name' ) );
		/**
		 * Filter: 'Yoast\WP\News\publication_language' - Allow overriding the publication language for a specific Indexable.
		 *
		 * @param string    $publication_lang The publication language.
		 * @param Indexable $item             The post.
		 */
		$publication_lang = (string) apply_filters( 'Yoast\WP\News\publication_language', $this->get_publication_lang(), $item );
		$publication_lang = sanitize_text_field( $publication_lang );
		$charset          = get_bloginfo( 'charset' );

		$publication_tag  = "\t\t<news:publication>\n";
		$publication_tag .= "\t\t\t<news:name>" . htmlspecialchars( $publication_name, ENT_COMPAT, $charset, false ) . '</news:name>' . "\n";
		$publication_tag .= "\t\t\t<news:language>" . htmlspecialchars( $publication_lang, ENT_COMPAT, $charset, false ) . '</news:language>' . "\n";
		$publication_tag .= "\t\t</news:publication>\n";

		return $publication_tag;
	}

	/**
	 * Getting the name for the sitemap, if $full_path is true, it will return the full path.
	 *
	 * @param bool $full_path Generate a full path.
	 *
	 * @return string
	 */
	public static function get_sitemap_name( $full_path = true ) {
		/**
		 * Allows for filtering the News sitemap name.
		 *
		 * @param string $sitemap_name First portion of the news sitemap "file" name.
		 *
		 * @since 12.5.0
		 */
		$sitemap_name = apply_filters( 'Yoast\WP\News\sitemap_name', self::news_sitemap_basename() );

		// When $full_path is true, it will generate a full path.
		if ( $full_path ) {
			return WPSEO_Sitemaps_Router::get_base_url( $sitemap_name . '-sitemap.xml' );
		}

		return $sitemap_name;
	}

	/**
	 * Returns the basename of the news-sitemap, the first portion of the name of the sitemap "file".
	 *
	 * Defaults to news, but it's possible to override it by using the YOAST_NEWS_SITEMAP_BASENAME constant.
	 *
	 * @since 3.1
	 *
	 * @return string Basename for the news sitemap.
	 */
	public static function news_sitemap_basename() {
		$basename = 'news';

		if ( post_type_exists( 'news' ) ) {
			$basename = 'yoast-news';
		}

		if ( defined( 'YOAST_NEWS_SITEMAP_BASENAME' ) ) {
			$basename = YOAST_NEWS_SITEMAP_BASENAME;
		}

		return $basename;
	}

	/**
	 * Retrieves the XSL URL that should be used in the current environment.
	 *
	 * When home_url and site_url are not the same, the home_url should be used.
	 * This is because the XSL needs to be served from the same domain, protocol and port
	 * as the XML file that is loading it.
	 *
	 * @return string The XSL URL that needs to be used.
	 */
	protected function get_xsl_url() {
		if ( home_url() !== site_url() ) {
			return home_url( $this->basename . '-sitemap.xsl' );
		}

		return plugin_dir_url( WPSEO_NEWS_FILE ) . 'assets/xml-news-sitemap.xsl';
	}

	/**
	 * Getting the publication language.
	 *
	 * @return string Publication language.
	 */
	private function get_publication_lang() {
		// phpcs:ignore WordPress.NamingConventions.PrefixAllGlobals.NonPrefixedHooknameFound -- WPSEO hook.
		$locale = apply_filters( 'wpseo_locale', get_locale() );

		// Fallback to 'en', if the length of the locale is less than 2 characters.
		if ( strlen( $locale ) < 2 ) {
			$locale = 'en';
		}

		return substr( $locale, 0, 2 );
	}
}