I have the following huge query that contains repeated subqueries , It looks really inefficient to me. How can i optimize it ?
SELECT T2.date1, T2.date2, T2.period, T1.market, T1.ticker, 0 AS scenario
FROM
(SELECT DISTINCT
Q.market AS market,
Q.ticker AS ticker
FROM portfolio.scenario S RIGHT JOIN portfolio.quote Q
ON S.series = (SELECT S.series
FROM scenario S
WHERE S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' AND
S.period = 'QUARTER'
ORDER BY S.date2
LIMIT 1) AND
Q.market = S.market AND
Q.ticker = S.ticker
WHERE Q.date = '2010-07-01' AND
S.date1 IS NULL) AS T1
JOIN
(SELECT DISTINCT S.date1, S.date2, S.period
FROM scenario S
WHERE S.series = (SELECT S.series
FROM scenario S
WHERE S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' AND
S.period = 'QUARTER'
ORDER BY S.date2
LIMIT 1) AND
S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01') AS T2
UNION
SELECT S.date1 AS date1,
S.date2 AS date2,
S.period AS period,
Q.market AS market,
Q.ticker AS ticker,
Q.close * EXP(S.ratio) AS scenario
FROM portfolio.scenario S , portfolio.quote Q
WHERE S.series = (SELECT S.series
FROM scenario S
WHERE S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' AND
S.period = 'QUARTER'
ORDER BY S.date2
LIMIT 1) AND
S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' AND
Q.date = '2010-07-01' AND
Q.market = S.market AND
Q.ticker = S.ticker
UNION
SELECT T2.date1, T2.date2, T2.period, T1.market, T1.ticker, 0 AS scenario
FROM
(SELECT DISTINCT
Q.market AS market,
Q.ticker AS ticker
FROM portfolio.scenario S , portfolio.quote Q
WHERE Q.date = '2010-07-01' AND
Q.market = S.market AND
Q.ticker = S.ticker AND
S.series = (SELECT S.series
FROM scenario S
WHERE S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' AND
S.period = 'QUARTER'
ORDER BY S.date2
LIMIT 1) AND
S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' ) AS T1
JOIN
(SELECT DISTINCT S.date1, S.date2, S.period
FROM scenario S
WHERE S.series = (SELECT S.series
FROM scenario S
WHERE S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' AND
S.period = 'QUARTER'
ORDER BY S.date2
LIMIT 1) AND
S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01') AS T2
WHERE (T2.date1, T2.date2, T2.period, T1.market, T1.ticker)
NOT IN (SELECT S.date1 AS date1,
S.date2 AS date2,
S.period AS period,
Q.market AS market,
Q.ticker AS ticker
FROM portfolio.scenario S , portfolio.quote Q
WHERE Q.date = '2010-07-01' AND
Q.market = S.market AND
Q.ticker = S.ticker AND
S.series = (SELECT S.series
FROM scenario S
WHERE S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' AND
S.period = 'QUARTER'
ORDER BY S.date2
LIMIT 1) AND
S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' )
ORDER BY
date1,date2,period,market,ticker
After @Bruce 's comment and some logic to reduce a subquery my query now is:
(SELECT S.date1,
S.date2,
S.period,
Q.market,
Q.ticker,
Q.close * EXP(S.ratio) AS scenario
FROM portfolio.scenario S , portfolio.quote Q
WHERE
S.date1 >= (@date1 := '2009-09-01') AND
S.date2 <= (@date2 := '2010-07-01') AND
Q.date = (@qdate := '2010-07-01') AND
S.series =
(@series :=
(SELECT S.series
FROM scenario S
WHERE S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' AND
S.period = 'QUARTER'
ORDER BY S.date2
LIMIT 1)) AND
Q.market = S.market AND
Q.ticker = S.ticker)
UNION
(SELECT T2.date1, T2.date2, T2.period, T1.market, T1.ticker, 0 AS scenario
FROM
(SELECT Q.market, Q.ticker
FROM quote Q
WHERE Q.date = @qdate) AS T1
JOIN
(SELECT DISTINCT S.date1, S.date2, S.period
FROM scenario S
WHERE S.series = @series AND
S.date1 >= @date1 AND
S.date2 <= @date2) AS T2
WHERE (T2.date1, T2.date2, T2.period, T1.market, T1.ticker)
NOT IN
(SELECT S.date1,
S.date2,
S.period,
Q.market,
Q.ticker
FROM portfolio.scenario S , portfolio.quote Q
WHERE Q.date = @qdate AND
Q.market = S.market AND
Q.ticker = S.ticker AND
S.series = @series AND
S.date1 >= @date1 AND
S.date2 <= @date2 ))
However, If i changed
(@series :=
(SELECT S.series
FROM scenario S
WHERE S.date1 >= '2009-09-01' AND
S.date2 <= '2010-07-01' AND
S.period = 'QUARTER'
ORDER BY S.date2
LIMIT 1))
to be
(@series :=
(SELECT S.series
FROM scenario S
WHERE S.date1 >= @date1 AND
S.date2 <= @date2 AND
S.period = 'QUARTER'
ORDER BY S.date2
LIMIT 1))
It takes too much time to process it (i have executed the query 10 mins ago and still did not get the result) while the query normally returns in 5 seconds.
Also when i reset the variables , execute the result is not correct (probably use the variable's value from the previous execution). How can i change that without adding SET statements (I would like it to be a single query)
Use MySQL variables:
SELECT
@x := ColumnName,
@y := ColumnName2 + @z,
@z := (SELECT * FROM SubTable WHERE x = @x),
(SELECT * FROM Table2 WHERE X = @z),
(SELECT * FROM Table3 WHERE X = @z)
FROM Table
WHERE
v = @v
- You can assign subselect and column values to SQL variables
- You can refer to these variables anywhere in the statement
- The variables contain their value from previous rows (if set)
- You can reuse subselects and other values in this way
来源:https://stackoverflow.com/questions/4225982/how-to-optimize-huge-query-with-repeated-subqueries