问题
Team,
I am trying to count two patterns in a file and list them as
pattern1: 2
pattern2: 3
#!/usr/bin/python
import os
import re
d = dict()
with open('/home/user/waste/nodes-prod.log', 'r') as file:
for line in file:
line = line.strip()
for word in line.split():
node1 = re.match(r"team1.*", word)
type(node1)
node2 = re.match(r"team2.*", word)
type(node2)
if node1 in d:
d[node1] = d[node1] + 1
else:
d[node2] = d[node2] + 1
for key in list(d.keys()):
print(key, ":", d[key])
my /home/user/waste/nodes-prod.log
is below
cat /home/user/waste/nodes-prod.log
team1-develop
team1-work
team2-research1
team2-research2
team2-research3
output
Traceback (most recent call last):
File "read-and-count-words-pattern-fromfile-using-dict-in-python.py", line 17, in <module>
d[node2] = d[node2] + 1
KeyError: <_sre.SRE_Match object; span=(0, 10), match='team2-research1'>
expected:
node1: 2
node2: 3
回答1:
It is easier if you read the entire text into memory (if that is not burdensome given the size of the file):
import re
with open(fn) as f:
txt=f.read()
print(f'node 1: {len(re.findall(r"team1.*", txt))}')
print(f'node 2: {len(re.findall(r"team2.*", txt))}')
Prints:
node 1: 2
node 2: 3
If you do want to do line-by-line, you can just keep a counter:
import re
node1,node2 =(0,0)
with open(fn) as f:
for line in f:
if re.search(r"team1.*", line): node1+=1
if re.search(r"team2.*", line): node2+=1
print(f'node 1: {node1}')
print(f'node 2: {node2}')
Better still, you could use a dict
to map any `"team\d" to a mapping of that variable number:
nodes={}
with open(fn) as f:
for line in f:
if m:=re.search(r"team(\d+).*", line):
nodes[m.group(1)]=nodes.get(m.group(1),0)+1
>>> nodes
{'1': 2, '2': 3}
回答2:
#!/usr/bin/python
import os
import re
# dict is the dictionary,
# pattern is the regular expression,
# word is the word to match.
def increment(dict: dict, pattern: str, word: str):
match = re.match(pattern, word)
if match:
# re.match returns a Match object, not a string.
# .group(n) returns n-s capture. .group() returns
# 0th capture, i.e. the whole match:
node = match.group()
# Initialise the counter, if necessary:
if not node in dict:
dict[node] = 0
# Increment the counter:
dict[node] += 1
# filename is a string that contains a path to file to parse,
# patterns is a dictionary of patterns to check against,
# the function returns a dictionary.
def scores(filename: str, patterns: dict) -> dict:
# Initialise the dictionary that keeps counters:
d = dict()
with open(filename, 'r') as file:
for line in file:
line = line.strip()
for word in line.split():
# Check against all patterns:
for pattern in patterns:
increment(d, pattern, word)
return d
# Patterns to search for.
# It is claimed that Python caches the compiled
# regular expressions, so that we don't need
# to pre-compile them:
patterns = [r"team1.*", r"team2.*"]
# file to parse:
filename = '/home/user/waste/nodes-prod.log'
# This is how a dictionary is iterated, when both key and value are needed:
for key, value in scores(filename, patterns).items():
print(key, ":", value)
def increment(dict: dict, pattern: str, word: str):
defines a function that receives a dictionarydict
,pattern
and theword
to check againstpatern
. and a Match objectmatch
. The parameters are typed, which is optional in Python.def scores(filename: str, patterns: dict) -> dict:
defines a function that receivesfilename
as a string, a dictionary ofpatterns
and returns another dictionary of match counts.
来源:https://stackoverflow.com/questions/64219282/count-occurrences-of-a-string-pattern-in-a-file-and-count