Say that the user inputs:
\"daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaaaaaaaaaaafnnasm\"
How would you go about finding the highest
Try this:
import collections
def runLenEncode(s):
start, i = 0, 0;
answer = []
while i<len(s):
while i<len(s) and s[start]==s[i]:
i += 1
answer.append((s[start], i-start))
start = i
return answer
def runLenFilter(encoding, thresholds, repLens):
answer = []
for char, count in encoding:
if char in thresholds and count>=thresholds[char]:
count = repLens[char]
answer.append(char*count)
return ''.join(answer)
def maxFilter(encoding, repLens):
maxCounts = collections.defaultdict(int)
for char, count in encoding:
if char in repLens and count > maxCounts[char]:
maxCounts[char] = count
maxCounts = dict(maxCounts)
answer = []
for char, count in encoding:
if char in repLens and count==maxCounts[char]:
count = repLens[char]
answer.append(char*count)
return ''.join(answer)
if __name__ == "__main__":
print('starting')
s = "daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaaaaaaaaaaafnnasm"
encoding = runLenEncode(s)
print("encoding:", encoding)
thresholds = {'a':3}
repLens = {'a':2}
decoded = runLenFilter(encoding, thresholds, repLens)
print('lenFilter:', decoded)
filtered = maxFilter(encoding, repLens)
print("maxFilter:", filtered)
print('done')
And the output:
$ python3 myFile.py
starting
encoding: [('d', 1), ('a', 1), ('s', 1), ('l', 1), ('a', 1), ('k', 1), ('n', 1), ('d', 1), ('l', 1), ('a', 5), ('j', 1), ('n', 1), ('j', 1), ('n', 1), ('d', 1), ('i', 1), ('b', 1), ('n', 1), ('i', 1), ('a', 3), ('f', 1), ('i', 1), ('j', 1), ('d', 1), ('n', 1), ('f', 1), ('i', 1), ('j', 1), ('d', 1), ('n', 1), ('s', 1), ('i', 1), ('j', 1), ('f', 1), ('n', 1), ('s', 1), ('d', 1), ('i', 1), ('n', 1), ('i', 1), ('f', 1), ('a', 11), ('f', 1), ('n', 2), ('a', 1), ('s', 1), ('m', 1)]
lenFilter: daslakndlaajnjndibniaafijdnfijdnsijfnsdinifaafnnasm
maxFilter: daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaafnnasm
done
Starting with the input string:
input = "daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaaaaaaaaaaafnnasm"
To get the max consecutive number of occurrences, you would use:
max(len(s) for s in re.findall(r'a+', input))
To replace only the longest unbroken sequence of "a"s with 2 "a"s, you would use:
maxMatch = max(re.finditer(r'a+', input), key= lambda m: len(m.group()))
output = input[:maxMatch.start()] + "aa" + input[maxMatch.end():]
First, I obtain an iterable of MatchObject
s by testing the input string against the regex a+
, then use max
to obtain the MatchObject
with the greatest length. Then, I splice the portion of the original string up to the start of the match, the string "aa", and the portion of the original string after the end of the match to give you your final output.
To replace all occurrences of more than 2 "a"s with 2 "a"s, you would use:
output = re.sub(r'a{3,}', "aa", input)
The way I would do it.
s = "daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaaaaaaaaaaafnnasm"
print(s)
a_len = len(s)
found_a_len = 0
keep_going = True
while a_len>0 and keep_going:
aas = "a" * a_len
if aas in s:
found_a_len = a_len
keep_going = False
a_len=a_len -1
print ("max length of a:" , found_a_len)
keep_going = True
while keep_going:
s=s.replace("aaa","aa")
if "aaa" not in s:
keep_going = False
print(s)
this outputs:
daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaaaaaaaaaaafnnasm
max length of a: 11
daslakndlaajnjndibniaafijdnfijdnsijfnsdinifaafnnasm
Some people might not like my style of coding, but for me, this code is very easy to reason about.
A lower level approach if you don't want to use regular expressions.
def count_and_reduce(s, a):
num = 0
maxnum = 0
out = ''
for c in s:
if c == a:
num += 1
maxnum = max(num, maxnum)
else:
num = 0
if num <= 2:
out += c
return maxnum, out
I've seen a couple of regex answers in the comments and the other question, so I'm gonna take a different road. Just getting the count can be done many different ways.
from itertools import groupby
inp = 'daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaaaaaaaaaaafnnasm';
char_groups = groupby(inp, lambda char:char=='a')
counts = [len(list(group)) for char, group in char_groups]
# We know every other element of 'counts' is an 'a' element.
# We just need to know whether to start at zero or one.
# If inp starts with 'a', start at 0. Otherwise start at 1.
max(counts[not inp.startswith('a')::2]) # 11
I'm pretty sure both of the regex answers I've seen will replace every string of 'aa+' with two 'a's. If you only want to replace the longest string of 'a's with 'aa' and leave the rest alone:
char_groups = groupby(inp)
counts = [(char, len(list(group))) for char, group in char_groups]
max_idx = max(range(len(counts)), key=lambda i:counts[i][1] if counts[i][0]=='a' else 0)
result = ''.join(char*count for char, count in counts[:max_idx]) + 'aa' + ''.join(char*count for char, count in counts[max_idx+1:])
# 'daslakndlaaaaajnjndibniaaafijdnfijdnsijfnsdinifaafnnasm'