问题
I have an index: CREATE INDEX index_c_profiles_on_city_state_name_domain ON
c_profiles ((data->>'state'), (data->>'city'), name, domain);
and another index: CREATE INDEX index_c_profiles_on_state ON
c_profiles (state)
with a new column I created called 'state' with the value of data->>'state' copied manually over from the jsonb column for each row
I tried this query to use the first index:SELECT mm.name, mm.domain, mm.data ->> 'city' as city, mm.data ->> 'state' as state
FROM c_profiles as mm
WHERE ((mm.data ->> 'state') = 'CA')
and it took 8 seconds to complete
[
{
"Execution Time": 8023.687,
"Planning Time": 0.5,
"Plan": {
"Exact Heap Blocks": 16743,
"Node Type": "Bitmap Heap Scan",
"Actual Total Time": 8014.769,
"Shared Hit Blocks": 2523,
"Schema": "public",
"Plans": [
{
"Node Type": "Bitmap Index Scan",
"Actual Total Time": 94.311,
"Shared Hit Blocks": 3,
"Shared Read Blocks": 350,
"Temp Written Blocks": 0,
"Local Dirtied Blocks": 0,
"Local Hit Blocks": 0,
"Plan Width": 0,
"Actual Loops": 1,
"Actual Startup Time": 94.311,
"Temp Read Blocks": 0,
"Local Read Blocks": 0,
"Index Name": "index_c_profiles_on_city_state_name_domain",
"Startup Cost": 0,
"Shared Dirtied Blocks": 0,
"Shared Written Blocks": 0,
"Local Written Blocks": 0,
"Plan Rows": 21870,
"Index Cond": "((mm.data ->> 'state'::text) = 'CA'::text)",
"Actual Rows": 21729,
"Parent Relationship": "Outer",
"Total Cost": 1604.45
}
],
"Shared Read Blocks": 17472,
"Relation Name": "c_profiles",
"Local Hit Blocks": 0,
"Local Dirtied Blocks": 0,
"Temp Written Blocks": 0,
"Plan Width": 980,
"Actual Loops": 1,
"Rows Removed by Index Recheck": 0,
"Lossy Heap Blocks": 0,
"Alias": "mm",
"Recheck Cond": "((mm.data ->> 'state'::text) = 'CA'::text)",
"Temp Read Blocks": 0,
"Output": [
"(data ->> 'city'::text)",
"(data ->> 'state'::text)"
],
"Actual Startup Time": 99.44,
"Local Read Blocks": 0,
"Startup Cost": 1609.91,
"Shared Dirtied Blocks": 360,
"Shared Written Blocks": 0,
"Local Written Blocks": 0,
"Plan Rows": 21870,
"Actual Rows": 21537,
"Total Cost": 42388.85
},
"Triggers": []
}
]
Then, I tried this query to use the second index:SELECT state
FROM mattermark_profiles as mm
WHERE state = 'CA'
and it took 20 milliseconds to complete
[
{
"Execution Time": 20.19,
"Planning Time": 0.5,
"Plan": {
"Node Type": "Index Only Scan",
"Actual Total Time": 16.206,
"Shared Hit Blocks": 633,
"Schema": "public",
"Scan Direction": "Forward",
"Shared Read Blocks": 59,
"Relation Name": "c_profiles",
"Local Hit Blocks": 0,
"Heap Fetches": 776,
"Local Dirtied Blocks": 0,
"Temp Written Blocks": 0,
"Plan Width": 3,
"Actual Loops": 1,
"Rows Removed by Index Recheck": 0,
"Actual Startup Time": 0.08,
"Alias": "mm",
"Temp Read Blocks": 0,
"Output": [
"state"
],
"Local Read Blocks": 0,
"Index Name": "index_c_profiles_on_state",
"Startup Cost": 0.42,
"Shared Dirtied Blocks": 53,
"Shared Written Blocks": 0,
"Local Written Blocks": 0,
"Plan Rows": 21870,
"Index Cond": "(mm.state = 'CA'::text)",
"Actual Rows": 21524,
"Total Cost": 639.12
},
"Triggers": []
}
]
Since both outputs are the same rows, why does the JSON index perform so poorly? Since the data and row count is the same, it would seem that there must be a way to achieve similar results with a JSON index, but it is unclear to me how. Any help would be appreciated!
来源:https://stackoverflow.com/questions/51432884/json-index-much-slower-than-index-on-text-column-with-same-value