Skip to content
Open
4 changes: 4 additions & 0 deletions projects/packages/sync/changelog/posts-checksum-in-allowlist
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Significance: patch
Type: changed

Sync: Use post_type IN (allowed types) instead of NOT IN (blacklist) for the posts checksum query, for improved performance.
48 changes: 43 additions & 5 deletions projects/packages/sync/src/class-settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,16 @@ class Settings {
*/
public static $is_doing_cron;

/**
* Per-request cache for allowed post types (checksum). Cleared when post_types_blacklist is updated.
*
* @access private
* @static
*
* @var array|null
*/
private static $cached_allowed_post_types_for_checksum = null;

/**
* Whether we're currently syncing.
*
Expand Down Expand Up @@ -293,6 +303,11 @@ public static function update_settings( $new_settings ) {
$updated = update_option( self::SETTINGS_OPTION_PREFIX . $setting, $value, true );
}

// Invalidate allowlist cache when post types blacklist changes.
if ( 'post_types_blacklist' === $setting ) {
self::$cached_allowed_post_types_for_checksum = null;
}

// If we set the disabled option to true, clear the queues.
if ( ( 'disable' === $setting || 'network_disable' === $setting ) && (bool) $value ) {
$listener = Listener::get_instance();
Expand Down Expand Up @@ -338,16 +353,19 @@ public static function is_network_setting( $setting ) {
}

/**
* Returns escaped SQL for blacklisted post types.
* Can be injected directly into a WHERE clause.
* Returns escaped SQL for allowed post types (all registered minus blacklist).
*
* @access public
* @static
*
* @return string SQL WHERE clause.
* @return string SQL WHERE clause (post_type IN).
*/
public static function get_blacklisted_post_types_sql() {
return 'post_type NOT IN (\'' . implode( '\', \'', array_map( 'esc_sql', static::get_setting( 'post_types_blacklist' ) ) ) . '\')';
public static function get_allowed_post_types_sql() {
$allowed = static::get_allowed_post_types_for_checksum();
if ( empty( $allowed ) ) {
return '1 = 0'; // This is an SQL condition that is always false.
}
return 'post_type IN (\'' . implode( '\', \'', array_map( 'esc_sql', $allowed ) ) . '\')';
Comment on lines +378 to +383
Copy link

Copilot AI Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_allowed_post_types_sql() relies on get_allowed_post_types_for_checksum(), but the helper’s name/doc comment imply it’s checksum-specific while it’s now also the source of truth for full-sync/range filtering. Consider introducing a more general helper name (and delegating the checksum-named method to it) to avoid confusion about intended scope.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also valid.

}

/**
Expand All @@ -367,6 +385,25 @@ public static function get_disallowed_post_types_structured() {
);
}

/**
* Get allowed post types for the posts checksum (all registered minus blacklist).
* Used so the checksum query can use IN (allowed) instead of NOT IN (blacklist).
*
* Result is cached for the request to prevent unnecessary get_post_types() calls.
*
* @return array Allowed post type names (no DB query; get_post_types() is used).
*/
public static function get_allowed_post_types_for_checksum() {
if ( null !== self::$cached_allowed_post_types_for_checksum ) {
return self::$cached_allowed_post_types_for_checksum;
}
$all_types = get_post_types( array(), 'names' );
$blacklist = static::get_setting( 'post_types_blacklist' );
$allowed = array_diff( $all_types, $blacklist );
Comment on lines +404 to +417
Copy link

Copilot AI Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_allowed_post_types_for_checksum() builds the allowlist from get_post_types(), which only returns registered post types. This changes behavior vs the prior post_type NOT IN ( blacklist ) approach: posts whose post_type exists in the DB but is no longer registered (e.g. plugin removed leaving orphan rows) will now be excluded from checksums/full-sync ranges and may never be repaired/synced. If that behavior change isn’t intended, consider deriving the allowlist from the DB’s distinct post_type values (then subtract the blacklist) or keeping the old NOT IN behavior for full-sync/range queries while using the allowlist only for the checksum path.

Suggested change
* Get allowed post types for the posts checksum (all registered minus blacklist).
* Used so the checksum query can use IN (allowed) instead of NOT IN (blacklist).
*
* Result is cached for the request to prevent unnecessary get_post_types() calls.
*
* @return array Allowed post type names (no DB query; get_post_types() is used).
*/
public static function get_allowed_post_types_for_checksum() {
if ( null !== self::$cached_allowed_post_types_for_checksum ) {
return self::$cached_allowed_post_types_for_checksum;
}
$all_types = get_post_types( array(), 'names' );
$blacklist = static::get_setting( 'post_types_blacklist' );
$allowed = array_diff( $all_types, $blacklist );
* Get allowed post types for the posts checksum (all post types in the DB minus blacklist).
* Used so the checksum query can use IN (allowed) instead of NOT IN (blacklist).
*
* Result is cached for the request to prevent unnecessary database calls.
*
* @return array Allowed post type names.
*/
public static function get_allowed_post_types_for_checksum() {
if ( null !== self::$cached_allowed_post_types_for_checksum ) {
return self::$cached_allowed_post_types_for_checksum;
}
global $wpdb;
// Fetch all distinct post_type values present in the posts table.
$all_types = $wpdb->get_col( "SELECT DISTINCT post_type FROM {$wpdb->posts}" );
if ( ! is_array( $all_types ) ) {
$all_types = array();
}
$blacklist = static::get_setting( 'post_types_blacklist' );
$allowed = array_diff( $all_types, $blacklist );

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is very valid.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

posts whose post_type exists in the DB but is no longer registered (e.g. plugin removed leaving orphan rows) will now be excluded from checksums/full-sync ranges

True, but I believe the current approach makes more sense actually :)

Copy link
Contributor Author

@coder-karen coder-karen Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. But I think we'd need to change how incremental sync syncs non-registered post types at the same time here. In testing a non-registered post type will get synced in incremental sync, and removed during a fix checksum, whereas on trunk it won't be removed.
Removing the non existent posts (or rather posts with no post type) from being synced via incremental sync would have a knock on effect - there have been 56 million synced jetpack_sync_save_post actions in the last month where the post_status is jetpack_sync_non_registered_post_type. Constituting approx 44000 sites]. For what reason do we need to sync 'non existent posts' anyway?

self::$cached_allowed_post_types_for_checksum = array_values( $allowed );
return self::$cached_allowed_post_types_for_checksum;
}

/**
* Returns escaped SQL for blacklisted taxonomies.
* Can be injected directly into a WHERE clause.
Expand Down Expand Up @@ -544,6 +581,7 @@ public static function reset_data() {
foreach ( $valid_settings as $option => $value ) {
delete_option( self::SETTINGS_OPTION_PREFIX . $option );
}
self::$cached_allowed_post_types_for_checksum = null;
self::set_importing( null );
self::set_doing_cron( null );
self::set_is_syncing( null );
Expand Down
2 changes: 1 addition & 1 deletion projects/packages/sync/src/modules/class-full-sync.php
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ public function get_range( $type ) {
case 'posts':
$table = $wpdb->posts;
$id = 'ID';
$where_sql = Settings::get_blacklisted_post_types_sql();
$where_sql = Settings::get_allowed_post_types_sql();

break;
case 'comments':
Expand Down
2 changes: 1 addition & 1 deletion projects/packages/sync/src/modules/class-posts.php
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ public function estimate_full_sync_actions( $config ) {
* @return string WHERE SQL clause, or `null` if no comments are specified in the module config.
*/
public function get_where_sql( $config ) {
$where_sql = Settings::get_blacklisted_post_types_sql();
$where_sql = Settings::get_allowed_post_types_sql();

// Config is a list of post IDs to sync.
if ( is_array( $config ) && ! empty( $config ) ) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,12 @@ protected static function get_default_tables() {
'range_field' => 'ID',
'key_fields' => array( 'ID' ),
'checksum_fields' => array( 'post_modified_gmt' ),
'filter_values' => Sync\Settings::get_disallowed_post_types_structured(),
'filter_values' => array(
'post_type' => array(
'operator' => 'IN',
'values' => Sync\Settings::get_allowed_post_types_for_checksum(),
),
),
'is_table_enabled_callback' => function () {
return false !== Sync\Modules::get_module( 'posts' );
},
Expand Down Expand Up @@ -528,6 +533,10 @@ protected function prepare_filter_values_as_sql( $filter_values = array(), $tabl
case 'IN':
case 'NOT IN':
$filter_values_count = is_countable( $filter['values'] ) ? count( $filter['values'] ) : 0;
if ( 0 === $filter_values_count ) {
$result[] = 'IN' === $filter['operator'] ? '1 = 0' : '1 = 1';
break;
}
Comment on lines 535 to +539
Copy link

Copilot AI Mar 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new empty-list handling for IN/NOT IN is good defensive behavior, but it’s not covered by the existing Table_Checksum tests. Please add a unit/integration test that exercises an empty values array and asserts the generated filter SQL is valid (e.g. 1 = 0/1 = 1) and that checksum calculation does not error.

Copilot uses AI. Check for mistakes.
$values_placeholders = implode( ',', array_fill( 0, $filter_values_count, '%s' ) );
$statement = "{$key} {$filter['operator']} ( $values_placeholders )";

Expand All @@ -543,7 +552,7 @@ protected function prepare_filter_values_as_sql( $filter_values = array(), $tabl
}

/**
* Build the filter query baased off range fields and values and the additional sql.
* Build the filter query based off range fields and values and the additional sql.
*
* @param int|null $range_from Start of the range.
* @param int|null $range_to End of the range.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Significance: patch
Type: other
Comment: Sync: Minor update to existing tests.


Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ public static function field_table_validation_provider() {
'range_field' => 'ID',
'key_fields' => array( 'ID' ),
'checksum_fields' => array( 'post_modified_gmt' ),
'filter_sql' => Settings::get_blacklisted_post_types_sql(),
'filter_sql' => Settings::get_allowed_post_types_sql(),
),
),
true,
Expand All @@ -277,7 +277,7 @@ public static function field_table_validation_provider() {
'range_field' => 'ID_2',
'key_fields' => array( 'ID' ),
'checksum_fields' => array( 'post_modified_gmt' ),
'filter_sql' => Settings::get_blacklisted_post_types_sql(),
'filter_sql' => Settings::get_allowed_post_types_sql(),
),
),
false,
Expand All @@ -290,7 +290,7 @@ public static function field_table_validation_provider() {
'range_field' => 'ID',
'key_fields' => array( 'ID' ),
'checksum_fields' => array( 'post_modified_gmt_2' ),
'filter_sql' => Settings::get_blacklisted_post_types_sql(),
'filter_sql' => Settings::get_allowed_post_types_sql(),
),
),
false,
Expand All @@ -303,7 +303,7 @@ public static function field_table_validation_provider() {
'range_field' => 'ID',
'key_fields' => array( 'ID_2' ),
'checksum_fields' => array( 'post_modified_gmt' ),
'filter_sql' => Settings::get_blacklisted_post_types_sql(),
'filter_sql' => Settings::get_allowed_post_types_sql(),
),
),
false,
Expand Down
Loading