问题
My python service uses sqlite database encrypted with sqlcipher lib. There is a performance problem when processing very first request. Database is very small and simple, requests are trivial. But processing the first request after connection takes ridiculously large time: 0.4 sec.
This is a working test script. It's also quite simple (most of it are preparations and debug messages). It creates small databases (with and without encryption), populates them with some data and measures time of SQL selects.
#!/usr/bin/env python
import time
import os.path
from pysqlcipher3 import dbapi2 as sqlcipher
class Timer:
"""Helper class for measuring elapsed time"""
def __init__(self):
self.start = None
def measure(self):
finish = time.perf_counter()
elapsed = None if self.start is None else finish - self.start
self.start = finish
return elapsed
def make_databse(db_name, key):
"""Create a small database with two tables: version and account.
Populate tables with some data.
"""
if os.path.exists(db_name):
print("Databse {} already exists".format(db_name))
return
db = sqlcipher.connect(db_name)
with db:
if key:
db.executescript('pragma key="{}";'.format(key))
db.execute("CREATE TABLE version(key INTEGER PRIMARY KEY ASC, ver);")
db.execute("INSERT INTO version(ver) VALUES ('aaa');")
db.execute("CREATE TABLE account(key INTEGER PRIMARY KEY ASC, name);")
cur = db.cursor()
for n_id in range(100):
cur.execute("INSERT INTO account(name) VALUES ('name {}');".format(n_id))
print("Test database created: {}, {}".format(
db_name,
"Key len={}".format(len(key)) if key else "Not encripted"))
def test_connect_and_reads(run_id, db_name, key, *tables_names):
"""Main test method: connect to db, make selects from specified
tables, measure timings
"""
print("{}: Start! Db: {}, {}".format(
run_id, db_name,
"Encripted, key len={}".format(len(key)) if key else "Not encripted"))
timer = Timer()
timer.measure()
db = sqlcipher.connect(db_name)
print("{}: Connect. Elapsed: {} sec".format(run_id, timer.measure()))
if key:
db.executescript('pragma key="{}";'.format(key))
print("{}: Provide Key. Elapsed: {} sec".format(run_id, timer.measure()))
else:
print("{}: Skip Provide Key. Elapsed: {} sec".format(run_id, timer.measure()))
for table_name in tables_names:
curs = db.execute("SELECT * FROM {};".format(table_name))
recs = [x for x in curs]
print("{}: Read {} records from table '{}'. Elapsed: {} sec".format(
run_id, len(recs), table_name, timer.measure()))
print("{}: done.".format(run_id))
print()
def main():
key = "DUMMYKEYDUMMYKEY"
make_databse("rabbits_enc.sqlite3", key) # prepare encrypted database
make_databse("rabbits.sqlite3", "") # prepare plaintext database
# test encrypted db
test_connect_and_reads(0, "rabbits_enc.sqlite3", key, 'version', 'account')
test_connect_and_reads(1, "rabbits_enc.sqlite3", key, 'account', 'version')
test_connect_and_reads(2, "rabbits_enc.sqlite3", key, 'account', 'account')
# test plaintext db
test_connect_and_reads(3, "rabbits.sqlite3", "", 'version', 'account')
test_connect_and_reads(4, "rabbits.sqlite3", "", 'account', 'version')
test_connect_and_reads(5, "rabbits.sqlite3", "", 'account', 'account')
if __name__ == '__main__':
main()
From the output of this script I see that if database is encrypted then processing the very first SELECT takes 0.4 sec. There is no such problem if database is plaintext.
Databse rabbits_enc.sqlite3 already exists
Databse rabbits.sqlite3 already exists
0: Start! Db: rabbits_enc.sqlite3, Encripted, key len=16
0: Connect. Elapsed: 0.00016079703345894814 sec
0: Provide Key. Elapsed: 0.00215048500103876 sec
0: Read 1 records from table 'version'. Elapsed: 0.4296091449796222 sec
0: Read 100 records from table 'account'. Elapsed: 0.0009567929664626718 sec
0: done.
1: Start! Db: rabbits_enc.sqlite3, Encripted, key len=16
1: Connect. Elapsed: 7.332500535994768e-05 sec
1: Provide Key. Elapsed: 0.00037083501229062676 sec
1: Read 100 records from table 'account'. Elapsed: 0.4182819949928671 sec
1: Read 1 records from table 'version'. Elapsed: 0.0005165199982002378 sec
1: done.
2: Start! Db: rabbits_enc.sqlite3, Encripted, key len=16
2: Connect. Elapsed: 9.809300536289811e-05 sec
2: Provide Key. Elapsed: 0.0019192049512639642 sec
2: Read 100 records from table 'account'. Elapsed: 0.4121257350197993 sec
2: Read 100 records from table 'account'. Elapsed: 0.0008492250344716012 sec
2: done.
3: Start! Db: rabbits.sqlite3, Not encripted
3: Connect. Elapsed: 7.215503137558699e-05 sec
3: Skip Provide Key. Elapsed: 0.0002521659480407834 sec
3: Read 1 records from table 'version'. Elapsed: 0.0035479930229485035 sec
3: Read 100 records from table 'account'. Elapsed: 0.000983492995146662 sec
3: done.
4: Start! Db: rabbits.sqlite3, Not encripted
4: Connect. Elapsed: 7.175595965236425e-05 sec
4: Skip Provide Key. Elapsed: 0.004018213017843664 sec
4: Read 100 records from table 'account'. Elapsed: 0.0010135580087080598 sec
4: Read 1 records from table 'version'. Elapsed: 0.0014616100233979523 sec
4: done.
5: Start! Db: rabbits.sqlite3, Not encripted
5: Connect. Elapsed: 7.912697037681937e-05 sec
5: Skip Provide Key. Elapsed: 0.0003501430037431419 sec
5: Read 100 records from table 'account'. Elapsed: 0.0007411669939756393 sec
5: Read 100 records from table 'account'. Elapsed: 0.000722763012163341 sec
5: done.
I understand that db encryption brings some overhead, but it is very large in this case. I believe there is some problem in my configuration which can be fixed. Any ideas?
回答1:
SQLCipher by default utilizes PBKDF2, currently set to 64,000 iterations to compute an encryption key, this process is slow by design. The key will be derived generally following the first SQL command to be executed after keying the database where an operation would touch the file. We provide general performance guidance on SQLCipher here.
来源:https://stackoverflow.com/questions/53005753/python-sqlite-sqlcipher-very-poor-performance-processing-first-request