Complete guide to combining queries with UNION, INTERSECT, and EXCEPT in Relica
- Introduction
- UNION Operations
- UNION ALL Operations
- INTERSECT Operations
- EXCEPT Operations
- Chaining Set Operations
- When to Use Set Operations
- Performance Considerations
- Database Compatibility
- Best Practices
- Common Patterns
- Troubleshooting
Set operations combine results from two or more SELECT queries. Think of them as mathematical set operations (∪ union, ∩ intersection, − difference) applied to database result sets.
Key Benefits:
- Combine data from multiple tables with similar structure
- Remove duplicates automatically (UNION)
- Find common records (INTERSECT)
- Find differences between datasets (EXCEPT)
- Simplify complex OR conditions
UNION combines results from multiple queries and removes duplicates.
package main
import (
"github.com/coregx/relica"
)
// Get all unique names from both active and archived users
func GetAllUserNames(db *relica.DB) ([]string, error) {
q1 := db.Select("name").
From("users").
Where("status = ?", 1)
q2 := db.Select("name").
From("archived_users").
Where("status = ?", 1)
var names []string
query := q1.Union(q2).Build()
err := query.All(&names)
return names, err
}Generated SQL (PostgreSQL):
SELECT "name" FROM "users" WHERE status = $1
UNION
SELECT "name" FROM "archived_users" WHERE status = $2Result: Unique names from both tables (duplicates removed).
// Combine customer emails from multiple sources
func GetAllCustomerEmails(db *relica.DB) ([]string, error) {
customers := db.Select("email").
From("customers").
Where("active = ?", true)
subscribers := db.Select("email").
From("newsletter_subscribers").
Where("subscribed = ?", true)
partners := db.Select("contact_email").
From("business_partners")
var emails []string
query := customers.Union(subscribers).Union(partners).Build()
err := query.All(&emails)
return emails, err
}Generated SQL:
SELECT "email" FROM "customers" WHERE active = $1
UNION
SELECT "email" FROM "newsletter_subscribers" WHERE subscribed = $2
UNION
SELECT "contact_email" FROM "business_partners"// Get top 10 unique product names from current and legacy catalogs
func GetTopProductNames(db *relica.DB) ([]Product, error) {
type Product struct {
Name string `db:"name"`
Price float64 `db:"price"`
}
current := db.Select("name", "price").
From("products").
Where("active = ?", true)
legacy := db.Select("name", "price").
From("legacy_products").
Where("available = ?", true)
var products []Product
query := current.Union(legacy).
OrderBy("name").
Limit(10).
Build()
err := query.All(&products)
return products, err
}Generated SQL:
SELECT "name", "price" FROM "products" WHERE active = $1
UNION
SELECT "name", "price" FROM "legacy_products" WHERE available = $2
ORDER BY "name"
LIMIT 10💡 Tip: ORDER BY and LIMIT apply to the entire combined result, not individual queries.
✅ Use UNION when:
- Need unique results from multiple sources
- Data structure is similar across tables
- Want to remove duplicates
- Combining historical and current data
- Merging partitioned tables
❌ Use UNION ALL instead when:
- Performance is critical (UNION ALL is faster)
- Duplicates are impossible or desired
- Large result sets (duplicate removal is expensive)
UNION ALL combines results and keeps all duplicates. Much faster than UNION.
// Get all order IDs from 2023 and 2024 (including duplicates if any)
func GetAllOrderIDs(db *relica.DB) ([]int, error) {
orders2023 := db.Select("id").
From("orders_2023")
orders2024 := db.Select("id").
From("orders_2024")
var ids []int
query := orders2023.UnionAll(orders2024).Build()
err := query.All(&ids)
return ids, err
}Generated SQL:
SELECT "id" FROM "orders_2023"
UNION ALL
SELECT "id" FROM "orders_2024"Result: All IDs from both tables, duplicates included.
// Query across time-partitioned tables
func GetOrdersDateRange(db *relica.DB, startDate, endDate time.Time) ([]Order, error) {
q2023 := db.Select().
From("orders_2023").
Where("created_at BETWEEN ? AND ?", startDate, endDate)
q2024 := db.Select().
From("orders_2024").
Where("created_at BETWEEN ? AND ?", startDate, endDate)
q2025 := db.Select().
From("orders_2025").
Where("created_at BETWEEN ? AND ?", startDate, endDate)
var orders []Order
query := q2023.UnionAll(q2024).UnionAll(q2025).Build()
err := query.All(&orders)
return orders, err
}Performance: 3-5x faster than UNION for large datasets.
// Total sales across all regions
func GetTotalSalesByRegion(db *relica.DB) ([]RegionSales, error) {
type RegionSales struct {
Region string `db:"region"`
Total float64 `db:"total"`
}
north := db.Select("'North' as region", "SUM(amount) as total").
From("sales_north")
south := db.Select("'South' as region", "SUM(amount) as total").
From("sales_south")
east := db.Select("'East' as region", "SUM(amount) as total").
From("sales_east")
west := db.Select("'West' as region", "SUM(amount) as total").
From("sales_west")
var sales []RegionSales
query := north.UnionAll(south).UnionAll(east).UnionAll(west).Build()
err := query.All(&sales)
return sales, err
}Benchmark (1M rows each table):
| Operation | Time | Memory | Notes |
|---|---|---|---|
| UNION | ~3500ms | High | Sorts + deduplicates |
| UNION ALL | ~800ms | Low | Direct concatenation |
Speedup: UNION ALL is 3-5x faster for large datasets.
✅ Use UNION ALL when:
- Performance is important
- Duplicates are impossible (different partitions)
- Duplicates don't matter
- Combining disjoint datasets
- Temporary intermediate results
❌ Use UNION instead when:
- Must guarantee unique results
- Small result sets (performance difference negligible)
- Business logic requires duplicate removal
INTERSECT returns only rows that appear in both queries (set intersection ∩).
Database Support:
- PostgreSQL: ✓ All versions
- MySQL: ✓ 8.0.31+ only
- SQLite: ✓ 3.25+
// Find users who are both premium members AND forum participants
func GetEngagedPremiumUsers(db *relica.DB) ([]int, error) {
premiumMembers := db.Select("user_id").
From("premium_memberships").
Where("active = ?", true)
forumParticipants := db.Select("user_id").
From("forum_posts").
GroupBy("user_id").
Having("COUNT(*) >= ?", 10)
var userIDs []int
query := premiumMembers.Intersect(forumParticipants).Build()
err := query.All(&userIDs)
return userIDs, err
}Generated SQL:
SELECT "user_id" FROM "premium_memberships" WHERE active = $1
INTERSECT
SELECT "user_id" FROM "forum_posts" GROUP BY "user_id" HAVING COUNT(*) >= $2Result: Only user IDs present in both result sets.
// Find products sold in both stores
func GetCommonProducts(db *relica.DB) ([]Product, error) {
store1Products := db.Select("product_id", "name").
From("store1_inventory").
Where("stock > ?", 0)
store2Products := db.Select("product_id", "name").
From("store2_inventory").
Where("stock > ?", 0)
var products []Product
query := store1Products.Intersect(store2Products).Build()
err := query.All(&products)
return products, err
}✅ Use INTERSECT when:
- Finding records present in multiple datasets
- Validating data consistency
- Finding overlapping categories
- Business logic requires "AND" across tables
❌ Use INNER JOIN instead when:
- Need columns from both sides
- More complex join conditions
- Better performance (often faster than INTERSECT)
- MySQL < 8.0.31 (no INTERSECT support)
Alternative with EXISTS:
// Equivalent to INTERSECT using EXISTS (works on all databases)
db.Select("user_id").
From("premium_memberships").
Where("active = ?", true).
Where(relica.Exists(
db.Select("1").
From("forum_posts").
Where("forum_posts.user_id = premium_memberships.user_id").
GroupBy("user_id").
Having("COUNT(*) >= ?", 10),
))EXCEPT returns rows from first query that are not in second query (set difference −).
Database Support:
- PostgreSQL: ✓ All versions
- MySQL: ✓ 8.0.31+ only
- SQLite: ✓ 3.25+
Note: SQL standard uses EXCEPT, but some databases call it MINUS (Oracle, older MySQL).
// Find users who registered but never placed an order
func GetInactiveUsers(db *relica.DB) ([]int, error) {
allUsers := db.Select("id").
From("users")
usersWithOrders := db.Select("user_id").
From("orders")
var inactiveUserIDs []int
query := allUsers.Except(usersWithOrders).Build()
err := query.All(&inactiveUserIDs)
return inactiveUserIDs, err
}Generated SQL:
SELECT "id" FROM "users"
EXCEPT
SELECT "user_id" FROM "orders"Result: User IDs from users table that don't appear in orders table.
// Find products in inventory but not in active orders
func GetUnorderedProducts(db *relica.DB) ([]Product, error) {
inventory := db.Select("product_id", "name").
From("inventory").
Where("stock > ?", 0)
orderedProducts := db.Select("product_id", "name").
From("order_items oi").
InnerJoin("orders o", "oi.order_id = o.id").
Where("o.status IN (?, ?)", "pending", "processing")
var products []Product
query := inventory.Except(orderedProducts).Build()
err := query.All(&products)
return products, err
}// Find active users excluding banned and suspended users
func GetActiveNonBannedUsers(db *relica.DB) ([]int, error) {
allUsers := db.Select("id").
From("users").
Where("active = ?", true)
bannedUsers := db.Select("user_id").
From("banned_accounts")
suspendedUsers := db.Select("user_id").
From("suspended_accounts")
var userIDs []int
query := allUsers.Except(bannedUsers).Except(suspendedUsers).Build()
err := query.All(&userIDs)
return userIDs, err
}Generated SQL:
SELECT "id" FROM "users" WHERE active = $1
EXCEPT
SELECT "user_id" FROM "banned_accounts"
EXCEPT
SELECT "user_id" FROM "suspended_accounts"✅ Use EXCEPT when:
- Finding records in one set but not another
- Data validation (comparing expected vs actual)
- Excluding specific subsets
- Set difference logic
❌ Use NOT EXISTS instead when:
- Better performance needed
- MySQL < 8.0.31 (no EXCEPT support)
- More complex exclusion logic
- NULL-safe operations required
Alternative with NOT EXISTS:
// Equivalent to EXCEPT using NOT EXISTS (works on all databases)
db.Select("id").
From("users").
Where(relica.NotExists(
db.Select("1").
From("orders").
Where("orders.user_id = users.id"),
))You can chain multiple set operations to create complex queries.
// (Products from store1 UNION products from store2) EXCEPT discontinued products
func GetAvailableProducts(db *relica.DB) ([]int, error) {
store1 := db.Select("product_id").
From("store1_inventory")
store2 := db.Select("product_id").
From("store2_inventory")
discontinued := db.Select("id").
From("discontinued_products")
var productIDs []int
query := store1.Union(store2).Except(discontinued).Build()
err := query.All(&productIDs)
return productIDs, err
}Generated SQL:
SELECT "product_id" FROM "store1_inventory"
UNION
SELECT "product_id" FROM "store2_inventory"
EXCEPT
SELECT "id" FROM "discontinued_products"// Users active in forums OR purchased recently, AND have premium membership
func GetEngagedPremiumUsers(db *relica.DB, since time.Time) ([]int, error) {
forumUsers := db.Select("user_id").
From("forum_posts").
Where("created_at > ?", since).
GroupBy("user_id")
buyers := db.Select("user_id").
From("orders").
Where("created_at > ?", since).
GroupBy("user_id")
premiumMembers := db.Select("user_id").
From("premium_memberships").
Where("active = ?", true)
var userIDs []int
// (forum users UNION buyers) INTERSECT premium members
query := forumUsers.Union(buyers).Intersect(premiumMembers).Build()
err := query.All(&userIDs)
return userIDs, err
}Evaluation Order: Left to right (like math operations without parentheses).
SQL set operations have equal precedence and evaluate left-to-right:
A UNION B INTERSECT C EXCEPT D
-- Evaluates as: ((A UNION B) INTERSECT C) EXCEPT D💡 Tip: Use subqueries for complex precedence control:
// Force evaluation order with subqueries
unionResult := q1.Union(q2)
intersectResult := unionResult.Intersect(q3)
finalResult := intersectResult.Except(q4)Need to combine query results?
├─ Remove duplicates? → UNION
├─ Keep duplicates? → UNION ALL (faster)
├─ Find common records?
│ ├─ Simple match → INTERSECT
│ └─ Need joined columns → INNER JOIN
└─ Find differences?
├─ Simple exclusion → EXCEPT
└─ Complex logic → NOT EXISTS
UNION:
- Merging similar tables (current + archived)
- Combining multiple data sources
- Deduplicating across datasets
- Historical + current data
UNION ALL:
- Time-partitioned tables (guaranteed no duplicates)
- Performance-critical queries
- Temporary result sets
- Log aggregation
INTERSECT:
- Finding overlaps
- Data validation
- "AND" logic across tables
- Set membership tests
EXCEPT:
- Finding missing records
- Exclusion logic
- Data diff operations
- "NOT IN" alternative
| Scenario | Set Operation | Alternative | Recommendation |
|---|---|---|---|
| Combine unique records | UNION | DISTINCT + UNION ALL | UNION (clearer) |
| Find common records | INTERSECT | INNER JOIN | JOIN (need columns) |
| Exclude records | EXCEPT | NOT EXISTS | NOT EXISTS (faster) |
| Combine partitions | UNION ALL | — | UNION ALL (best) |
Benchmark (1M rows × 2 tables):
| Operation | CPU Time | Memory | I/O |
|---|---|---|---|
| UNION | ~3500ms | 450 MB | High (sorts) |
| UNION ALL | ~800ms | 120 MB | Low |
Why UNION ALL is faster:
- No deduplication (no sort/hash)
- Streaming results (no buffering)
- Lower memory usage
Use UNION ALL when possible.
Rule: All queries in set operation must have:
- Same number of columns
- Compatible column types
- Same column order
// ❌ BAD: Column count mismatch
q1 := db.Select("id", "name").From("users")
q2 := db.Select("id").From("archived_users")
q1.Union(q2) // Error: column count mismatch
// ✅ GOOD: Same columns
q1 := db.Select("id", "name").From("users")
q2 := db.Select("id", "name").From("archived_users")
q1.Union(q2) // OKIndex the WHERE clauses in each query:
-- Index columns used in WHERE clauses
CREATE INDEX idx_users_status ON users(status);
CREATE INDEX idx_archived_status ON archived_users(status);Impact: 10-100x faster for filtered queries.
Apply to final result only:
// ORDER BY applies to combined result
query := q1.Union(q2).
OrderBy("name"). // Sorts entire UNION result
Limit(10) // Top 10 from combined resultPerformance: Sorting happens after set operation (can be slow for large results).
Modern databases can parallelize set operations:
- PostgreSQL 9.6+: Parallel UNION/UNION ALL
- MySQL 8.0.14+: Parallel set operations
- SQLite: Single-threaded
Benefit: 2-4x speedup on multi-core systems.
| Operation | PostgreSQL | MySQL 5.7 | MySQL 8.0.0-30 | MySQL 8.0.31+ | SQLite 3.25+ |
|---|---|---|---|---|---|
| UNION | ✓ All | ✓ | ✓ | ✓ | ✓ |
| UNION ALL | ✓ All | ✓ | ✓ | ✓ | ✓ |
| INTERSECT | ✓ All | ❌ | ❌ | ✓ | ✓ |
| EXCEPT | ✓ All | ❌ | ❌ | ✓ | ✓ |
| ORDER BY | ✓ | ✓ | ✓ | ✓ | ✓ |
| LIMIT | ✓ | ✓ | ✓ | ✓ | ✓ |
MySQL Version Notes:
- MySQL 5.7: UNION and UNION ALL only
- MySQL 8.0.0-8.0.30: UNION and UNION ALL only
- MySQL 8.0.31+: Full support (INTERSECT, EXCEPT added)
- MySQL workarounds: Use JOIN/EXISTS for older versions
Workaround for MySQL < 8.0.31:
// INTERSECT workaround using EXISTS
func intersectWorkaround(db *relica.DB) {
// Instead of: q1.Intersect(q2)
// Use: q1 WHERE EXISTS (SELECT ... FROM q2)
q1 := db.Select("user_id").
From("premium_memberships").
Where(relica.Exists(
db.Select("1").
From("forum_posts").
Where("forum_posts.user_id = premium_memberships.user_id"),
))
}
// EXCEPT workaround using NOT EXISTS
func exceptWorkaround(db *relica.DB) {
// Instead of: q1.Except(q2)
// Use: q1 WHERE NOT EXISTS (SELECT ... FROM q2)
q1 := db.Select("id").
From("users").
Where(relica.NotExists(
db.Select("1").
From("orders").
Where("orders.user_id = users.id"),
))
}-
Use UNION ALL when duplicates don't matter (3-5x faster)
q1.UnionAll(q2) // Faster than Union(q2)
-
Match column count and types
q1 := db.Select("id", "name").From("users") q2 := db.Select("id", "name").From("archived_users")
-
Apply ORDER BY to final result
q1.Union(q2).OrderBy("name")
-
Use meaningful column aliases
q1 := db.Select("'active' as source", "id", "name").From("users") q2 := db.Select("'archived' as source", "id", "name").From("archived_users")
-
Index WHERE clause columns
CREATE INDEX idx_status ON users(status);
-
Don't use UNION when UNION ALL works (unnecessary overhead)
// ❌ Bad: Slow q1.Union(q2) // Removes duplicates you don't need // ✅ Good: Fast q1.UnionAll(q2)
-
Don't mix incompatible column types
// ❌ Bad: Type mismatch q1 := db.Select("id", "created_at").From("users") q2 := db.Select("id", "name").From("archived") // String vs timestamp
-
Don't use set operations when JOIN is clearer
// ❌ Bad: Unclear intent INTERSECT for simple matching // ✅ Good: Clear intent INNER JOIN
-
Don't forget column aliases for literals
// ❌ Bad: Unclear column name Select("'North'") // ✅ Good: Named column Select("'North' as region")
-
Don't rely on column position (use explicit aliases)
// ❌ Bad: Fragile Select("name", "email") // What if column order changes? // ✅ Good: Explicit Select("name as user_name", "email as user_email")
// Combine current orders with archived orders
func GetAllOrders(db *relica.DB, userID int) ([]Order, error) {
current := db.Select().
From("orders").
Where("user_id = ?", userID)
archived := db.Select().
From("orders_archive").
Where("user_id = ?", userID)
var orders []Order
query := current.UnionAll(archived).
OrderBy("created_at DESC").
Build()
err := query.All(&orders)
return orders, err
}// Get unique email addresses from multiple sources
func GetAllUniqueEmails(db *relica.DB) ([]string, error) {
customers := db.Select("email").From("customers")
subscribers := db.Select("email").From("newsletter")
partners := db.Select("email").From("partners")
var emails []string
query := customers.Union(subscribers).Union(partners).Build()
err := query.All(&emails)
return emails, err
}// Query time-partitioned tables
func GetOrdersByDateRange(db *relica.DB, start, end time.Time) ([]Order, error) {
var queries []*relica.SelectQuery
// Determine which partitions to query
years := []int{2023, 2024, 2025}
for _, year := range years {
q := db.Select().
From(fmt.Sprintf("orders_%d", year)).
Where("created_at BETWEEN ? AND ?", start, end)
queries = append(queries, q)
}
// Combine all partitions with UNION ALL
result := queries[0]
for i := 1; i < len(queries); i++ {
result = result.UnionAll(queries[i])
}
var orders []Order
query := result.OrderBy("created_at DESC").Build()
err := query.All(&orders)
return orders, err
}// Find VIP customers: (high spenders OR frequent buyers) AND active
func GetVIPCustomers(db *relica.DB) ([]int, error) {
highSpenders := db.Select("user_id").
From("orders").
GroupBy("user_id").
Having("SUM(total) > ?", 10000)
frequentBuyers := db.Select("user_id").
From("orders").
GroupBy("user_id").
Having("COUNT(*) >= ?", 50)
activeUsers := db.Select("id").
From("users").
Where("active = ? AND last_login > ?", true, time.Now().AddDate(0, -1, 0))
var vipIDs []int
query := highSpenders.Union(frequentBuyers).Intersect(activeUsers).Build()
err := query.All(&vipIDs)
return vipIDs, err
}// Find orphaned order items (items without valid orders)
func FindOrphanedItems(db *relica.DB) ([]int, error) {
allItemOrderIDs := db.Select("order_id").
From("order_items").
GroupBy("order_id")
validOrderIDs := db.Select("id").
From("orders")
var orphanedIDs []int
query := allItemOrderIDs.Except(validOrderIDs).Build()
err := query.All(&orphanedIDs)
return orphanedIDs, err
}Problem: Queries have different number of columns
// ❌ ERROR: The used SELECT statements have a different number of columns
q1 := db.Select("id", "name").From("users")
q2 := db.Select("id").From("archived")
q1.Union(q2)Solution: Match column counts
// ✅ GOOD: Same column count
q1 := db.Select("id", "name").From("users")
q2 := db.Select("id", "NULL as name").From("archived") // Add placeholder
q1.Union(q2)Problem: Column types are incompatible
// ❌ ERROR: Types don't match (INT vs VARCHAR)
q1 := db.Select("id").From("users") // INT
q2 := db.Select("name").From("archived") // VARCHARSolution: Cast types
// ✅ GOOD: Cast to compatible type
q1 := db.Select("CAST(id AS VARCHAR)").From("users")
q2 := db.Select("name").From("archived")Problem: MySQL < 8.0.31 doesn't support INTERSECT/EXCEPT
// ❌ ERROR: You have an error in your SQL syntax (MySQL 8.0.30)
q1.Intersect(q2)Solution: Use JOIN or EXISTS
// ✅ GOOD: INTERSECT alternative using EXISTS
q1 := db.Select("user_id").
From("premium_memberships").
Where(relica.Exists(
db.Select("1").
From("forum_posts").
Where("forum_posts.user_id = premium_memberships.user_id"),
))
// ✅ GOOD: EXCEPT alternative using NOT EXISTS
q1 := db.Select("id").
From("users").
Where(relica.NotExists(
db.Select("1").
From("orders").
Where("orders.user_id = users.id"),
))Problem: UNION is slow on large datasets
// ❌ SLOW: Deduplication overhead
q1.Union(q2) // 3500ms for 2M rowsSolution: Use UNION ALL if duplicates don't matter
// ✅ FAST: No deduplication
q1.UnionAll(q2) // 800ms for 2M rows (4.3x faster)Problem: ORDER BY in individual queries ignored
// ❌ ORDER BY in q1 is ignored
q1 := db.Select("name").From("users").OrderBy("name")
q2 := db.Select("name").From("archived")
q1.Union(q2) // q1's ORDER BY has no effectSolution: Apply ORDER BY to final result
// ✅ GOOD: ORDER BY on union result
q1 := db.Select("name").From("users")
q2 := db.Select("name").From("archived")
q1.Union(q2).OrderBy("name")Complete working examples available in Relica repository:
examples/set_operations/union.go- UNION examplesexamples/set_operations/union_all.go- UNION ALL patternsexamples/set_operations/intersect.go- INTERSECT examplesexamples/set_operations/except.go- EXCEPT patternsexamples/set_operations/chaining.go- Complex chaining
- Subquery Guide - Alternative to set operations for filtering
- CTE Guide - Simplify complex set operations with CTEs
- Performance Tuning - Optimize query performance
Last Updated: 2025-11-24 Minimum Go Version: 1.25+