问题
I was looking into performance of selecting closest points within certain proximity to given coordinate.
Options are to ether use two decimal(8,6)
- lat, long columns or single geography
column and work with that.
I am only interested which is faster?
回答1:
TL;DR Geography is ~10 times faster.
Ok so I have set up test:
Couple of tables one with id,lat,long (int, decimal(8,6),decimal(8,6))
other with id,coord (int, geography)
.
Then insert 47k of random data.
For indexing first table I used nonclustered Ascending index on lat,long with fill factor of 95.
for second one GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW
with fill factor 95.
CREATE TABLE dbo.Temp
(
Id int NOT NULL IDENTITY (1, 1),
Coord geography NOT NULL
) ON [PRIMARY]
TEXTIMAGE_ON [PRIMARY]
GO
ALTER TABLE dbo.Temp ADD CONSTRAINT
PK_Temp PRIMARY KEY CLUSTERED
(
Id
) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
declare @i int =0
declare @lat decimal(8,6) =0.0
declare @long decimal(8,6) =0.0
while (@i < 47000)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
insert into Temp
select geography::Point(@lat, @long,4326)
set @i =@i+1
end
go
CREATE SPATIAL INDEX [SpatialIndex_1] ON [dbo].Temp
(
[coord]
)USING GEOGRAPHY_GRID
WITH (GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW),
CELLS_PER_OBJECT = 16, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = OFF, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
CREATE TABLE [dbo].[Temp2](
[Id] [int] IDENTITY(1,1) NOT NULL,
[Lat] [decimal](8, 6) NOT NULL,
[Long] [decimal](8, 6) NOT NULL,
CONSTRAINT [PK_Temp2] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
declare @i int = 0
declare @lat decimal(8,6) = 0
declare @long decimal(8,6) = 0
while (@i < 47000)
begin
set @lat = (select (0.9 - (RAND()*1.8))*100)
set @long = (select (0.9 - (RAND()*1.8))*100)
insert into Temp2
select @lat , @long
set @i = @i +1
end
go
CREATE NONCLUSTERED INDEX [Coord_IX] ON [dbo].[Temp2]
(
[Lat] ASC,
[Long] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
GO
Then I ran couple of tests:
First is for Lat,Long.
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
DECLARE @lat_s FLOAT = SIN(@lat * PI() / 180),
@lat_c FLOAT = COS(@lat * PI() / 180)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
((ACOS(@lat_s * SIN(lat * PI() / 180) + @lat_c * COS(lat * PI() / 180) * COS((@lon - long) * PI() / 180)) * 180 / PI()) * 60 * 1.1515) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
Second is for geography.
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
while (@i < 100)
begin
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' ' +CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st
ORDER BY @g.STDistance(st.[coord]) ASC
set @i =@i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
Results:
- Lat,Long - 00:00:10
- Geography - 00:02:21
For those who are wondering why geography has such poor performance here's execution plan - notice that it does not use spatial index, and takes ages to sort since row size is 4047 Bytes (against 25 Bytes in decimal). Trying to force index results in runtime error
P.S I also did one for flat surface but difference from spherical one is very small ~0.5s (comes back in 9.5-10.0 seconds which does seem to be slightly faster) still to have it all in one place here's script:
print 'flat'
declare @lat decimal(8,6) = 0.0,
@lon decimal(8,6) = 0.0,
@i int = 0,
@start datetime = getdate()
while(@i < 100)
begin
set @lat = (select (0.9 - Rand()*1.8)*100)
set @lon = (select (0.9 - (RAND()*1.8))*100.0)
SELECT DISTINCT top 1000 @lat, @lon, *
FROM (
SELECT
lat,
long,
sqrt(power((@lat - lat),2) + (power((@lon - long),2))) AS dist
FROM dbo.Temp2
) t
ORDER BY dist
set @i= @i+1
end
print CONVERT(varchar,(getdate()-@start),108)
go
UPDATE:
After switching to SQL 2014 and forcing to use the index with 10M records:
- Lat,Long took 00:00:22.935
- Flat took 00:00:22.988
- Geography took 00:00:02.427
geography script used:
DECLARE @g geography;
declare @point nvarchar(50) =''
declare @i int =0,
@lat decimal(8,6) =0.0,
@long decimal(8,6) =0.0,
@start datetime = getdate()
set @lat =(select (0.9 -Rand()*1.8)*100)
set @long =(select (0.9 -Rand()*1.8)*100)
set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ ' '
+CONVERT(varchar(10), @long)+')')
SET @g = geography::STGeomFromText(@point, 4326);
SELECT TOP 1000
@lat,
@long,
@g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)]
, st.[coord]
, st.id
FROM Temp st with(index([SpatialIndex_1]))
WHERE @g.STDistance(st.[coord]) IS NOT NULL
ORDER BY @g.STDistance(st.[coord]) asc
来源:https://stackoverflow.com/questions/34763910/sql-geometry-vs-decimal8-6-lat-long-performance