HEX
Server: Apache/2.4.65 (Debian)
System: Linux 88f31f35b0b8 6.1.0-38-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.147-1 (2025-08-02) x86_64
User: www-data (33)
PHP: 8.2.29
Disabled: NONE
Upload Files
File: /var/www/html/wp-content/plugins/wp-rss-aggregator/core/src/Importer.php
<?php

namespace RebelCode\Aggregator\Core;

use Throwable;
use SimpleXMLElement;
use RebelCode\Aggregator\Core\Utils\Time;
use RebelCode\Aggregator\Core\Utils\Result;
use RebelCode\Aggregator\Core\Utils\Arrays;
use RebelCode\Aggregator\Core\Store\WpPostsStore;
use RebelCode\Aggregator\Core\Store\SourcesStore;
use RebelCode\Aggregator\Core\Store\RejectListStore;
use RebelCode\Aggregator\Core\Store\ProgressStore;
use RebelCode\Aggregator\Core\Source\ReconcileStrategy;
use RebelCode\Aggregator\Core\Source\ImportOrder;
use RebelCode\Aggregator\Core\RssReader\RssItem;
use RebelCode\Aggregator\Core\Importer\WpPostBuilder;
use RebelCode\Aggregator\Core\Importer\IrPostBuilder;
use DateTime;

class Importer {

	public RssReader $rssReader;
	public SourcesStore $sources;
	public WpPostsStore $wpPosts;
	public RejectListStore $rejectList;
	public IrPostBuilder $postBuilder;
	public ProgressStore $progress;

	public function __construct(
		RssReader $rssReader,
		SourcesStore $sources,
		WpPostsStore $wpPosts,
		RejectListStore $rejectList,
		IrPostBuilder $postBuilder,
		ProgressStore $progress
	) {
		$this->rssReader = $rssReader;
		$this->sources = $sources;
		$this->wpPosts = $wpPosts;
		$this->rejectList = $rejectList;
		$this->postBuilder = $postBuilder;
		$this->progress = $progress;
	}

	/**
	 * Finds RSS feeds for a given URI.
	 *
	 * @param string $uri The URI of the RSS feed.
	 * @return Result<RssFeedInfo[]>
	 */
	public function findRssFeeds( string $uri ): Result {
		return $this->rssReader->findFeeds( $uri );
	}

	/**
	 * Reads items from an RSS feed.
	 *
	 * @param int         $srcId The ID of the source.
	 * @param int|null    $num The maximum number of items to fetch.
	 * @param int         $page The page number to fetch.
	 * @param string|null $pid Optional ID of the progress to update.
	 * @return Result<array{0:RssItem[],1:int}> An array of RSS items and the total number of items in the feed.
	 */
	public function read( string $url, ?int $num = null, int $page = 1, ?string $pid = null ): Result {
		$url = trim( $url );

		if ( strlen( $url ) === 0 ) {
			return Result::Err( 'RSS feed URL is empty' );
		}

		$this->progress->setMessage( $pid, __( 'Reading RSS feed', 'wprss' ) );

		Logger::info( "Reading RSS feed at {$url}" );

		$result = $this->rssReader->read( $url, true );

		if ( $result->isOk() ) {
			$feed = $result->get();
			$total = $feed->getNumItems();

			if ( $num === null ) {
				$start = 0;
			} else {
				$start = ( $page - 1 ) * $num;
			}

			$items = Arrays::fromIterable( $feed->getItems( $start, $num ) );
			$num = count( $items );

			$this->progress->setMessage(
				$pid,
				sprintf(
					_n( 'Found %d item in the RSS feed', 'Found %d items in the RSS feed', $num, 'wprss' ),
					$num
				)
			);

			Logger::info( "Found $num items in the feed" );

			return Result::Ok( array( $items, $total ) );
		} else {
			return Result::Err( $result->error() );
		}
	}

	/**
	 * Checks if an RSS feed is valid.
	 *
	 * @param string $url The URL of the RSS feed to validate.
	 */
	public function validate( string $url ): Result {
		$url = urldecode( trim( $url ) );
		$soapUrl = 'http://validator.w3.org/feed/check.cgi?output=soap12&url=' . urlencode( $url );

		$wpVersion = get_bloginfo( 'version' );
		$wpUrl = get_bloginfo( 'url' );

		$response = wp_remote_get(
			$soapUrl,
			array(
				'timeout' => 10,
				'httpversion' => '1.1',
				'user-agent' => "WordPress/{$wpVersion}; {$wpUrl}",
			)
		);

		if ( is_wp_error( $response ) ) {
			return Result::Err( $response->get_error_message() );
		}

		$body = wp_remote_retrieve_body( $response );

		try {
			$xml = new SimpleXMLElement( $body );
			$xml->registerXPathNamespace( 'env', 'http://www.w3.org/2003/05/soap-envelope' );
			$xml->registerXPathNamespace( 'm', 'http://www.w3.org/2005/10/feed-validator' );
			$validity = $xml->xpath( '//env:Envelope/env:Body/m:feedvalidationresponse/m:validity' );

			if ( $validity === null || count( $validity ) === 0 ) {
				$isValid = false;
			} else {
				$validText = (string) $validity[0];
				$isValid = strtolower( $validText ) === 'true';
			}

			return Result::Ok( $isValid );
		} catch ( Throwable $t ) {
			return Result::Err( $t );
		}
	}

	/**
	 * Fetches new items for a source.
	 *
	 * @param Source      $src The source.
	 * @param int|null    $num The maximum number of items to fetch.
	 * @param int         $page The page number to fetch.
	 * @param bool        $all If true, all items will be fetched, even existing ones.
	 * @param string|null $pid Optional ID of the progress to update.
	 * @return Result<iterable<IrPost>> The list of fetched items.
	 */
	public function fetch( Source $src, ?int $num = null, int $page = 1, bool $all = false, ?string $pid = null ): Result {
		$res = $this->read( $src->url, $num, $page, $pid );
		if ( $res->isErr() ) {
			return Result::Err( $res->error() );
		}

		[$items] = $res->get();

		$posts = $this->convert( $src, $items, $all, $pid );

		return Result::Ok( $posts );
	}

	/**
	 * Generates a preview of the items what would be imported for a source.
	 *
	 * @param Source      $src The source.
	 * @param int|null    $num Optional number of items to preview.
	 * @param int         $page The page number to preview.
	 * @param string|null $pid Optional ID of the progress to update.
	 * @return Result<array{posts:IrPost[],total:int}> The list of items.
	 */
	public function preview( Source $src, ?int $num = null, int $page = 1, ?string $pid = null ): Result {
		$this->progress->touch( $pid, 1 );

		$res = $this->read( $src->url, $num, $page, $pid );
		if ( $res->isErr() ) {
			return Result::Err( $res->error() );
		}

		[$items, $total] = $res->get();

		$posts = $this->convert( $src, $items, true, $pid, $num, $page );
		$posts = apply_filters( 'wpra.importer.preview', $posts, $src );
		remove_filter( 'the_content', 'wpautop' );
		return Result::Ok(
			array(
				'posts' => Arrays::fromIterable( $posts ),
				'total' => $total,
			)
		);
	}

	/**
	 * Imports items using {@link fetch()}, {@link convert()}, and {@link store()}.
	 *
	 * @param Source      $src The source.
	 * @param int|null    $num The maximum number of items to import.
	 * @param int         $page The page number to import.
	 * @param string|null $pid Optional ID of the progress to update.
	 * @return Result<IrPost[]> A result that contains the list of stored IR posts.
	 */
	public function import( Source $src, ?int $num = null, int $page = 1, ?string $pid = null ): Result {
		if ( trim( $src->url ) === '' ) {
			return Result::Err( __( 'The source does not have a URL.', 'wprss' ) );
		}

		Logger::setContext(
			array(
				'source' => $src->id,
				'import' => uniqid(),
			)
		);

		$this->progress->touch( $pid, 1 );

		$res = $this->fetch( $src, $num, $page, false, $pid );

		if ( $src->id ) {
			$newError = $res->isErr() ? $res->error()->getMessage() : null;
			$this->sources->updateLastError( array( $src->id ), $newError );
		}

		if ( $res->isErr() ) {
			return $res;
		}

		$items = $res->get();

		$this->progress->setMessage( $pid, __( 'Saving items', 'wprss' ) );

		$res = $this->store( $src, $items, $pid );

		if ( $res->isErr() ) {
			return Result::Err( $res->error() );
		}

		$posts = $res->get();

		if ( count( $posts ) === 0 ) {
			Logger::info( 'Source is already up to date' );
		}

		$res = $this->truncate( $src->id );
		if ( $res->isErr() ) {
			Logger::warning( $res->error() );
		}

		Logger::clearContext();

		return Result::Ok( $posts );
	}

	/**
	 * Same as {@link import()} but fetches the source by ID.
	 *
	 * @param int         $srcId The ID of the source.
	 * @param int|null    $num The maximum number of items to import.
	 * @param int         $page The page number to import.
	 * @param string|null $pid Optional ID of the progress to update.
	 * @return Result<IrPost[]> A result that contains the list of stored IR posts.
	 */
	public function importById( int $srcId, ?int $num = null, int $page = 1, ?string $pid = null ): Result {
		$res = $this->sources->getById( $srcId );
		if ( $res->isErr() ) {
			return $res;
		}

		// Force the update when it's triggered by user.
		return $this->import( $res->get(), $num, $page, $pid );
	}

	/**
	 * Imports items for all sources that are currently pending an update.
	 *
	 * @return Result<int> The number of updated sources.
	 */
	public function importPending(): Result {
		$res = $this->sources->getPendingUpdate();

		if ( ! $res->isOk() ) {
			return Result::Err( $res->error() );
		}

		$srcs = $res->get();
		$num = 0;

		foreach ( $srcs as $src ) {
			set_time_limit( 600 );

			$res = $this->import( $src );

			if ( $res->isErr() ) {
				return $res;
			}

			$num++;
		}

		return Result::Ok( $num );
	}

	/**
	 * Converts RSS items into IR posts.
	 *
	 * @param Source            $source The source that fetched the items.
	 * @param iterable<RssItem> $items The fetched RSS items.
	 * @param bool              $all If true, existing items will not be reconciled.
	 * @param string|null       $pid Optional ID of the progress to update.
	 * @param int|null          $num The maximum number of items to fetch.
	 * @param int               $page The page number to fetch.
	 * @return iterable<IrPost> A list of IR posts.
	 */
	public function convert( Source $source, iterable $items, bool $all = false, ?string $pid = null, ?int $num = null, ?int $page = 1 ): iterable {
		$items = Arrays::fromIterable( $items );
		$items_count_before_dup = count( $items );
		$items = $this->filterDuplicateItems( $source, $items, $all );
		$items = $this->maybeLoadMoreItems( $source, $items, $items_count_before_dup, $all, $pid, $num, $page );
		$items = $this->sortItems( $source, $items );
		$items = $this->rejectListFilter( $source, $items );
		$items = $this->limitItems( $source, $items );

		$this->progress->setTotal( $pid, count( $items ) );

		if ( $all ) {
			$existingMap = array();
		} else {
			$existingMap = $this->getExistingItemsMap( $items );
		}

		$count = count( $items );

		foreach ( $items as $i => $item ) {
			$this->progress->advance(
				$pid,
				1,
				sprintf( _x( '%1$d/%2$d', 'Progress bar', 'wprss' ), $i + 1, $count )
			);

			$id = $item->getId();

			if ( $id === null ) {
				Logger::warning( __( 'Ignoring RSS item with no GUID or permalink.', 'wprss' ) );
				continue;
			}

			$existingPost = $existingMap[ $id ] ?? null;

			$strat = $source->settings->reconcileStrategy;

			// On preview or import button we will change the ReconcileStrategy to OVERWRITE to force the import items regardless of actual settings.
			if (null === $existingPost) {
				$strat = ReconcileStrategy::OVERWRITE;
			}

			switch ( $strat ) {
				default:
					Logger::warning( "Unknown reconciliation strategy: \"{$strat}\". Ignoring item." );
					break;

				case ReconcileStrategy::PRESERVE:
					Logger::debug( "Item already exists: {$item->getId()}" );
					break;

				case ReconcileStrategy::OVERWRITE:
					Logger::debug( "Converting item: {$item->getId()}" );
					$newPost = $this->postBuilder->build( $item, $source );

					if ( $existingPost !== null ) {
						$newPost->sources = array_merge( $existingPost->sources, $newPost->sources );
						$newPost->sources = array_unique( $newPost->sources );
						$newPost->postId = $existingPost->postId;
						$newPost->id = $existingPost->id;
					}

					/** @var IrPost|null $finalPost */
					$finalPost = apply_filters( 'wpra.importer.post.final', $newPost, $item, $source );

					if ( $finalPost !== null ) {
						yield $newPost;
					}
			}
		}
	}

	/**
	 * Saves fetched IR posts to the store.
	 *
	 * @param Source           $source The feed source.
	 * @param iterable<IrPost> $irPosts The list of IR posts to store.
	 * @return Result<list<IrPost>> The list of stored IR posts.
	 */
	public function store( Source $source, iterable $irPosts, ?string $pid = null ): Result {
		$newPosts = array();

		// Update last updated time on source store.
		if ( $source->id ) {
			$this->sources->updateLastUpdateTime( array( $source->id ) );
		}

		foreach ( $irPosts as $irPost ) {
			$irPost = apply_filters( 'wpra.importer.post.store', $irPost, $source, $pid );
			if ( $irPost === null ) {
				continue;
			}

			$result = $this->createWpPost( $irPost );

			if ( $result->isOk() ) {
				$newPost = $result->get();
				$newPosts[] = $newPost;
				Logger::info( sprintf( __( 'Imported post (#%1$d) "%2$s"', 'wprss' ), $newPost->postId ?? 0, $newPost->title ) );
			} else {
				Logger::error( $result->error() );
			}
		}

		return Result::Ok( $newPosts );
	}

	public function truncate( int $srcId ): Result {
		$result = $this->sources->getById( $srcId );
		if ( $result->isErr() ) {
			return $result;
		}

		$src = $result->get();
		$numDeleted = 0;

		$limit = $src->settings->importLimit;
		$ageLimit = $src->settings->ageLimit;
		$ageUnit = $src->settings->ageLimitUnit;

		if ( $limit > 0 ) {
			$numImported = $this->wpPosts->getCount( array( $srcId ) )->getOr( 0 );
			$numToDelete = max( 0, $numImported - $limit );

			if ( absint( $numToDelete ) > 0 ) {
				Logger::info( sprintf( 'Truncating %d old items', $numToDelete ) );

				$result = $this->wpPosts->deleteFromSources( array( $srcId ), false, $numToDelete, 1, 'ASC' );
				if ( $result->isErr() ) {
					return $result;
				}

				$numDeleted += $result->get();
				Logger::debug( sprintf( 'Deleted %d items', $numDeleted ) );
			}
		}

		$ageTs = strtotime( "-$ageLimit $ageUnit" );
		if ( $ageLimit > 0 && $ageTs !== false ) {
			$minDate = new DateTime();
			$minDate->setTimestamp( $ageTs );

			Logger::info( sprintf( 'Truncating items older than %s', Time::toHumanFormat( $minDate ) ) );

			$result = $this->wpPosts->deleteOlderThan( $minDate, $srcId );
			if ( $result->isErr() ) {
				return $result;
			}

			$numDeleted += $result->get();
		}

		return Result::Ok( $numDeleted );
	}

	/**
	 * Creates a WordPress post from an IR Post.
	 *
	 * @return Result<IrPost>
	 */
	public function createWpPost( IrPost $post ): Result {
		do_action( 'wpra.importer.post.beforeCreate', $post );

		$result = WpPostBuilder::buildWpPost( $post );

		if ( $result->isOk() ) {
			$post = $result->get();
			do_action( 'wpra.importer.post.created', $post );
		}

		do_action( 'wpra.importer.post.afterCreate', $post );

		return $result;
	}

	/**
	 * Sorts the items according to the feed's settings.
	 *
	 * @param  Source        $source The feed source.
	 * @param  list<RssItem> $items  The items to sort.
	 * @return list<RssItem> The sorted items.
	 */
	protected function sortItems( Source $source, array $items ): array {
		Logger::debug( "Sorting items in {$source->settings->importOrder} order" );

		$mult = ( $source->settings->importOrder === ImportOrder::ASC ) ? 1 : -1;

		usort(
			$items,
			function ( RssItem $a, RssItem $b ) use ( $mult ) {
				$dateA = $a->getDateCreated() ?? $a->getDateModified();
				$dateB = $b->getDateCreated() ?? $b->getDateModified();

				return ( $dateA <=> $dateB ) * $mult;
			}
		);

		return $items;
	}

	/**
	 * Filters reject list of RSS items according to the source's settings.
	 *
	 * @param Source        $source The feed source.
	 * @param list<RssItem> $items The items to filter.
	 * @return list<RssItem> The filtered items.
	 */
	protected function rejectListFilter( Source $source, array $items ): array {
		Logger::debug( 'Filtering reject list items' );

		$fItems = array();
		foreach ( $items as $item ) {
			$url = $item->getPermalink();
			$id = $item->getId() ?? $url;

			if ( empty( $id ) ) {
				Logger::warning( 'Item has no ID or permalink', array( 'source' => $source ) );
				continue;
			}

			$result = $this->rejectList->contains( array_filter( array( $id, $url ) ) );

			if ( $result->isOk() ) {
				$rejected = $result->get();
			} else {
				$rejected = false;
				Logger::warning( $result->error(), array( 'source' => $source ) );
			}

			if ( $rejected ) {
				Logger::debug( "Item {$id} is rejected" );
				continue;
			}

			$item = apply_filters( 'wpra.importer.item.filter', $item, $source );

			if ( null === $item ) {
				continue;
			}

			$fItems[] = $item;
		}

		return $fItems;
	}

	/**
	 * Filters duplicate of RSS items based to the source's settings.
	 *
	 * @param Source        $source The feed source.
	 * @param list<RssItem> $items The items to filter.
	 * @param bool          $preview Whether the items are being filtered for a preview.
	 * @return list<RssItem> The filtered items.
	 */
	protected function filterDuplicateItems( Source $source, array $items, bool $preview = false ): array {
		if ( ! $source->settings->uniqueTitles ) {
			return $items;
		}

		Logger::debug( 'Filtering items' );

		$fItems = array();
		$titles = array();
		foreach ( $items as $item ) {
			$url = $item->getPermalink();
			$id = $item->getId() ?? $url;
			$title = $item->getTitle();

			if ( null === $title ) {
				continue;
			}

			if ( in_array( $title, $titles ) ) {
				continue;
			}

			$titles[] = $title;

			$titleExists = $this->wpPosts->titleExists( $title )->getOr( false );
			$titleExists = apply_filters( 'wpra.importer.titleExists', $titleExists, $title, $source );

			if ( ! $preview && $titleExists ) {
				Logger::debug( "Item {$id} does not have a unique title" );
				continue;
			}

			$fItems[] = $item;
		}

		return $fItems;
	}

	/**
	 * Load more items if condition met.
	 *
	 * @param Source        $source The feed source.
	 * @param list<RssItem> $items The items to filter.
	 * @param int           $items_count_before_dup Item counts before duplication filter.
	 * @param bool          $all If true, existing items will not be reconciled.
	 * @param string|null   $pid Optional ID of the progress to update.
	 * @param int|null      $num The maximum number of items to fetch.
	 * @param int           $page The page number to fetch.
	 * @return list<RssItem> The filtered items.
	 */
	protected function maybeLoadMoreItems( Source $source, array $items, int $items_count_before_dup, bool $all = false, ?string $pid = null, ?int $num = null, ?int $page = 1 ): array {
		if ( 0 === $source->settings->importLimit ) {
			return $items;
		}

		if ( ! $source->settings->uniqueTitles ) {
			return $items;
		}

		$items_count_after_dup = count( $items );
		$max_iterations = 5;
		$iteration = 0;

		while ( (int) $items_count_after_dup < (int) $items_count_before_dup && $iteration < $max_iterations ) {
			$iteration++;
			$offset = $items_count_before_dup - $items_count_after_dup;
			$num_to_fetch = $num ? $num + $offset + $iteration : null;

			$res = $this->read( $source->url, $num_to_fetch, $page, $pid );

			if ( $res->isErr() ) {
				break;
			}

			[$items, $total] = $res->get();

			$items = $this->filterDuplicateItems( $source, Arrays::fromIterable( $items ), $all );
			$items_count_after_dup = count( $items );

			if ( (int) $items_count_after_dup >= (int) $items_count_before_dup || $total <= $items_count_before_dup ) {
				break;
			}
		}

		return $items;
	}

	/**
	 * Trims the list of items to the maximum number of items allowed by the source.
	 *
	 * @param  Source        $source The feed source.
	 * @param  list<RssItem> $items  The items to trim.
	 * @return list<RssItem> The trimmed items.
	 */
	protected function limitItems( Source $source, array $items ): array {
		$limit = $source->settings->importLimit;
		$numItems = count( $items );

		if ( $limit > 0 && $numItems > $limit ) {
			Logger::debug( "Applying limit of {$limit} items" );
			$items = array_slice( $items, 0, $limit );
		}

		$ageLimit = $source->settings->ageLimit;
		$ageUnit = $source->settings->ageLimitUnit;
		$minDate = strtotime( "- $ageLimit $ageUnit" );
		if ( $ageLimit <= 0 || $minDate === false ) {
			return $items;
		}

		$results = array();
		foreach ( $items as $item ) {
			/** @var RssItem $item */
			$date = $item->getDateCreated() ?? $item->getDateModified();
			if ( $date === null ) {
				continue;
			}
			if ( $date->getTimestamp() < $minDate ) {
				continue;
			}
			$results[] = $item;
		}

		return $results;
	}

	/**
	 * Gets a map of the matching existing items for a list of incoming items.
	 *
	 * @param list<RssItem> $items The incoming items.
	 * @return array<string,IrPost> The existing items, keyed by their IDs.
	 */
	protected function getExistingItemsMap( array $items ): array {
		/** @var list<string> $guids */
		$guids = Arrays::map( $items, fn ( RssItem $item ) => $item->getId() ?? Arrays::skip() );

		$result = $this->wpPosts->getManyByGuids( $guids );
		$wpPosts = $result->getOr( array() );

		if ( $result->isErr() ) {
			Logger::error( $result->error() );
		}

		$map = array();
		foreach ( $wpPosts as $wpPost ) {
			$map[ $wpPost->guid ] = $wpPost;
		}

		return apply_filters( 'wpra.importer.existingItemsMap', $map, $guids, $items );
	}
}