# PostgreSQL 教程: 理解索引扫描的成本估算

## PostgreSQL 索引扫描

``````CREATE TABLE events (event_id char(7) PRIMARY KEY, occurred_at timestamptz);

INSERT INTO events (event_id, occurred_at)
SELECT to_hex(i),
TIMESTAMP '2024-01-01 00:00:00+08' + floor(random() * 100000)::int * '1 minutes'::interval
FROM generate_series(x'1000000'::int, x'2000000'::int, 16) AS s(i);

ANALYZE events;
``````

``````EXPLAIN SELECT * FROM events
WHERE event_id = '19AC6C0' AND occurred_at = '2024-01-06 12:00:00+08';
QUERY PLAN
------------------------------------------------------------------------------
Index Scan using events_pkey on events  (cost=0.42..8.45 rows=1 width=16)
Index Cond: (event_id = '19AC6C0'::bpchar)
Filter: (occurred_at = '2024-01-06 04:00:00+00'::timestamp with time zone)
(3 rows)
``````

``````EXPLAIN SELECT * FROM events WHERE ctid = '(0,1)'::tid;
QUERY PLAN
-------------------------------------------------------
Tid Scan on events  (cost=0.00..4.01 rows=1 width=16)
TID Cond: (ctid = '(0,1)'::tid)
(2 rows)
``````

## 成本估算

### 相关性高的好处

``````SELECT attname, correlation
FROM pg_stats WHERE tablename = 'events'
ORDER BY abs(correlation) DESC;
attname   | correlation
-------------+-------------
event_id    |           1
occurred_at |  0.00227215
(2 rows)
``````

``````EXPLAIN SELECT * FROM events WHERE event_id < '1100000';
QUERY PLAN
----------------------------------------------------------------------------------
Index Scan using events_pkey on events  (cost=0.42..2358.77 rows=73391 width=16)
Index Cond: (event_id < '1100000'::bpchar)
(2 rows)
``````

``````SELECT round(73391::numeric/reltuples::numeric, 4)
FROM pg_class WHERE relname = 'events';
round
--------
0.0700
(1 row)
``````

``````WITH costs(idx_cost, tbl_cost) AS (
SELECT
( SELECT round(
current_setting('random_page_cost')::real * pages +
current_setting('cpu_index_tuple_cost')::real * tuples +
current_setting('cpu_operator_cost')::real * tuples
)
FROM (
SELECT relpages * 0.0700 AS pages, reltuples * 0.0700 AS tuples
FROM pg_class WHERE relname = 'events_pkey'
) c
),
( SELECT round(
current_setting('seq_page_cost')::real * pages +
current_setting('cpu_tuple_cost')::real * tuples
)
FROM (
SELECT relpages * 0.0700 AS pages, reltuples * 0.0700 AS tuples
FROM pg_class WHERE relname = 'events'
) c
)
)
SELECT idx_cost, tbl_cost, idx_cost + tbl_cost AS total FROM costs;
idx_cost | tbl_cost | total
----------+----------+-------
1364 |      989 |  2353
(1 row)
``````

### 相关性低的坏处

``````CREATE INDEX ON events(occurred_at);
SET enable_seqscan = off;
SET enable_bitmapscan = off;

EXPLAIN SELECT * FROM events
WHERE occurred_at < '2024-01-06 12:00:00+08';
QUERY PLAN
----------------------------------------------------------------------------------------------
Index Scan using events_occurred_at_idx on events  (cost=0.42..16812.36 rows=78629 width=16)
Index Cond: (occurred_at < '2024-01-06 04:00:00+00'::timestamp with time zone)
(2 rows)
``````

``````SET effective_cache_size = '8kB';

EXPLAIN SELECT * FROM events
WHERE occurred_at < '2024-01-06 12:00:00+08';
QUERY PLAN
-----------------------------------------------------------------------------------------------
Index Scan using events_occurred_at_idx on events  (cost=0.42..316678.81 rows=78629 width=16)
Index Cond: (occurred_at < '2024-01-06 04:00:00+00'::timestamp with time zone)
(2 rows)

RESET effective_cache_size;
RESET enable_seqscan;
RESET enable_bitmapscan;
``````

PostgreSQL 优化