I am currently writing a script to run multiple SQL files using Python, a little background before you mention alternative methods; this is to automate the scripts and Python is
The more correct approach is to parse comments and quoted strings, and only consider ;
s outside of them. Or else your code will be broken immediately after you comment out several SQL statements with a block comment.
Here is a state machine based implementation I made for myself - this code is probably ugly and could be written much better, so please feel free to improve it by editing my answer.
It doesn't handle MySQL-style #
-starting comments but it is easy to add.
def split_sql_expressions(text):
current = ''
state = None
for c in text:
if state is None: # default state, outside of special entity
current += c
if c in '"\'':
# quoted string
state = c
elif c == '-':
# probably "--" comment
state = '-'
elif c == '/':
# probably '/*' comment
state = '/'
elif c == ';':
# remove it from the statement
current = current[:-1].strip()
# and save current stmt unless empty
if current:
yield current
current = ''
elif state == '-':
if c != '-':
# not a comment
state = None
current += c
continue
# remove first minus
current = current[:-1]
# comment until end of line
state = '--'
elif state == '--':
if c == '\n':
# end of comment
# and we do include this newline
current += c
state = None
# else just ignore
elif state == '/':
if c != '*':
state = None
current += c
continue
# remove starting slash
current = current[:-1]
# multiline comment
state = '/*'
elif state == '/*':
if c == '*':
# probably end of comment
state = '/**'
elif state == '/**':
if c == '/':
state = None
else:
# not an end
state = '/*'
elif state[0] in '"\'':
current += c
if state.endswith('\\'):
# prev was backslash, don't check for ender
# just revert to regular state
state = state[0]
continue
elif c == '\\':
# don't check next char
state += '\\'
continue
elif c == state[0]:
# end of quoted string
state = None
else:
raise Exception('Illegal state %s' % state)
if current:
current = current.rstrip(';').strip()
if current:
yield current
And use it like this:
with open('myfile.sql', 'r') as sqlfile:
for stmt in split_sql_expressions(sqlfile.read()):
cursor.execute(stmt)