I want \"git log --format=\'(%h) %s\' --abbrev=7 HEAD\"
to be split into
[
\"git\",
\"log\",
\"--format=\'(%h) %s\'\",
\"--abbrev=7\",
\
As often in life, you have choices.
Use an expression that matches and captures different parts. This can be combined with a replacement function as in
import re
string = "git log --format='(%h) %s' --abbrev=7 HEAD"
rx = re.compile(r"'[^']*'|(\s+)")
def replacer(match):
if match.group(1):
return "#@#"
else:
return match.group(0)
string = rx.sub(replacer, string)
parts = re.split('#@#', string)
# ^^^ same as in the function replacer
You could use the better regex module with (*SKIP)(*FAIL)
:
import regex as re
string = "git log --format='(%h) %s' --abbrev=7 HEAD"
rx = re.compile(r"'[^']*'(*SKIP)(*FAIL)|\s+")
parts = rx.split(string)
Write yourself a little parser:
def little_parser(string):
quote = False
stack = ''
for char in string:
if char == "'":
stack += char
quote = not quote
elif (char == ' ' and not quote):
yield stack
stack = ''
else:
stack += char
if stack:
yield stack
for part in little_parser(your_string):
print(part)
['git', 'log', "--format='(%h) %s'", '--abbrev=7', 'HEAD']