I\'m processing some json-formatted log files in python. It\'s very straightforward to code some conditional queries, e.g.
line=[1,\'runtime\',{\'elapsed\':
There is not really a safe way to do that. For basic conditionals you could parse an input string of a specific format. If your input was in the format "var > 5" you could parse it like this:
var, op, num = argv[1].split()
var = getattr(sys.modules[__name__], var) # Get a reference to the data
num = int(num)
if op == ">":
r = var > num
elif op == "<":
r = var < num
...
if r:
<do stuff>
To support more complicated statements you would need to improve the parser. If you don't trust your input you should wrap the getattr and int in try/except blocks. To support int or float or another var you would need quite a bit of logic.
This should do what you want:
from __future__ import print_function
import ast
import operator
import sys
OPERATORS = {
'<': operator.lt,
'<=': operator.le,
'>': operator.gt,
'>=': operator.ge,
'==': operator.eq,
'!=': operator.ne,
# 'in' is using a lambda because of the opposite operator order
# 'in': (lambda item, container: operator.contains(container, item),
'in': (lambda item, container: item in container),
'contains': operator.contains,
}
def process_conditionals(conditional_strings, variables):
for conditional_string in conditional_strings:
# Everything after first and op is part of second
first, op, second = conditional_string.split(None, 2)
resolved_operands = []
for raw_operand in (first, second):
try:
resolved_operand = ast.literal_eval(raw_operand)
except ValueError: # If the operand is not a valid literal
ve = sys.exc_info()
try:
# Check if the operand is a known value
resolved_operand = variables[raw_operand]
except KeyError: # If the operand is not a known value
# Re-raise the ValueError
raise ve[1], None, ve[2]
resolved_operands.append(resolved_operand)
yield (op, tuple(resolved_operands))
def main(lines, *conditional_strings):
for line in lines:
key, category, details = line
variables = {
'key': key,
'category': category,
'elapsed': details['elapsed'],
'jobname': details['jobname'],
}
conditionals = process_conditionals(conditional_strings, variables)
try:
# You could check each conditional separately to determine
# which ones have errors.
condition = all(OPERATORS[op](*operands)
for op, operands in conditionals)
except TypeError:
print("A literal in one of your conditionals is the wrong type. "
"If you can't see it, try running each one separately.",
file=sys.stderr)
break
except ValueError:
print("An operand in one of your conditionals is neither a known "
"variable nor a valid literal. If you can't see it, try "
"running each one separately.", file=sys.stderr)
break
else:
if condition:
print(line)
if __name__ == '__main__':
lines = [
[1, 'runtime', {'elapsed': 12.3, 'jobname': 'high38853'}],
[2, 'runtime', {'elapsed': 45.6, 'jobname': 'high38854'}],
[3, 'runtime', {'elapsed': 78.9, 'jobname': 'high38855'}],
[4, 'runtime', {'elapsed': 14.7, 'jobname': 'high38856'}],
[5, 'runtime', {'elapsed': 25.8, 'jobname': 'high38857'}],
[6, 'runtime', {'elapsed': 36.9, 'jobname': 'high38858'}],
[7, 'runtime', {'elapsed': 75.3, 'jobname': 'high38859'}],
]
conditional_strings = sys.argv[1:]
main(lines, *conditional_strings)
Examples:
$ ./SO_31999444.py 'elapsed > 30'
[2, 'runtime', {'jobname': 'high38854', 'elapsed': 45.6}]
[3, 'runtime', {'jobname': 'high38855', 'elapsed': 78.9}]
[6, 'runtime', {'jobname': 'high38858', 'elapsed': 36.9}]
[7, 'runtime', {'jobname': 'high38859', 'elapsed': 75.3}]
$ ./SO_31999444.py 'elapsed > 20' 'elapsed < 50'
[2, 'runtime', {'jobname': 'high38854', 'elapsed': 45.6}]
[5, 'runtime', {'jobname': 'high38857', 'elapsed': 25.8}]
[6, 'runtime', {'jobname': 'high38858', 'elapsed': 36.9}]
$ ./SO_31999444.py 'elapsed > 20' 'elapsed < 50' 'key >= 5'
[5, 'runtime', {'jobname': 'high38857', 'elapsed': 25.8}]
[6, 'runtime', {'jobname': 'high38858', 'elapsed': 36.9}]
$ ./SO_31999444.py "'9' in jobname"
[7, 'runtime', {'jobname': 'high38859', 'elapsed': 75.3}]
$ ./SO_31999444.py "jobname contains '9'"
[7, 'runtime', {'jobname': 'high38859', 'elapsed': 75.3}]
$ ./SO_31999444.py "jobname in ['high38857', 'high38858']"
[5, 'runtime', {'jobname': 'high38857', 'elapsed': 25.8}]
[6, 'runtime', {'jobname': 'high38858', 'elapsed': 36.9}]
$ ./SO_31999444.py "9 in jobname"
A literal in one of your conditionals is the wrong type. If you can't see it, try running each one separately.
$ ./SO_31999444.py "notakey == 'something'"
An operand in one of your conditionals is neither a known variable nor a valid literal. If you can't see it, try running each one separately.
$ ./SO_31999444.py "2 == 2"
[1, 'runtime', {'jobname': 'high38853', 'elapsed': 12.3}]
[2, 'runtime', {'jobname': 'high38854', 'elapsed': 45.6}]
[3, 'runtime', {'jobname': 'high38855', 'elapsed': 78.9}]
[4, 'runtime', {'jobname': 'high38856', 'elapsed': 14.7}]
[5, 'runtime', {'jobname': 'high38857', 'elapsed': 25.8}]
[6, 'runtime', {'jobname': 'high38858', 'elapsed': 36.9}]
[7, 'runtime', {'jobname': 'high38859', 'elapsed': 75.3}]
$ ./SO_31999444.py
[1, 'runtime', {'jobname': 'high38853', 'elapsed': 12.3}]
[2, 'runtime', {'jobname': 'high38854', 'elapsed': 45.6}]
[3, 'runtime', {'jobname': 'high38855', 'elapsed': 78.9}]
[4, 'runtime', {'jobname': 'high38856', 'elapsed': 14.7}]
[5, 'runtime', {'jobname': 'high38857', 'elapsed': 25.8}]
[6, 'runtime', {'jobname': 'high38858', 'elapsed': 36.9}]
[7, 'runtime', {'jobname': 'high38859', 'elapsed': 75.3}]
This was a fun little project :).