I\'ve got a Perl function which takes a timestamp and returns either the unchanged timestamp (if it\'s never seen it before) or otherwise, it appends some letters to make it uni
Well, sorry to say, but you can't just do a direct translation from Perl to Python (including bit-for-bit Perlisms) and expect the outcome to be prettier. It won't be, it will be considerably uglier.
If you want the prettiness of Python you will need to use Python idioms instead.
Now for the question at hand:
from string import uppercase
class Uniquifier(object):
def __init__(self):
self.last_timestamp = None
self.last_suffix = 0
def uniquify(self, timestamp):
if timestamp == self.last_timestamp:
timestamp = '%s%s' % (timestamp,
uppercase[self.last_suffix])
self.last_suffix += 1
else:
self.last_suffix = 0
self.timestamp = timestamp
return timestamp
uniquifier = Uniquifier()
uniquifier.uniquify(a_timestamp)
Prettier? Maybe. More readable? Probably.
Edit (re comments): Yes this fails after Z, and I am altogether unhappy with this solution. So I won't fix it, but might offer something better, like using a number instead:
timestamp = '%s%s' % (timestamp,
self.last_suffix)
If it were me, I would do this:
import uuid
def uniquify(timestamp):
return '%s-%s' % (timestamp, uuid.uuid4())
And just be happy.
I just tested this up to 1000 against the original perl implementation and diff returns the same results for both. The suffix code is tricky -- this is not a base 36 counter. Hasen J's solution - though it produces a unique timestamp - isn't quite the same since it goes from 'Z' to 'BA', when it should instead go to 'AA' to match the perl ++ operator.
#!/usr/bin/python
class uniqify:
def __init__(self):
self.last_timestamp = -1
self.next_suffix = 'A'
return
def suffix(self):
s = self.next_suffix
letters = [l for l in self.next_suffix]
if letters[-1] == 'Z':
letters.reverse()
nonz = None
for i in range(len(letters)):
if letters[i] != 'Z':
nonz = i
break
if nonz is not None:
letters[nonz] = chr(ord(letters[nonz]) + 1)
for i in range(0, nonz):
letters[i] = 'A'
else:
letters = ['A'] * (len(letters) + 1)
letters.reverse()
else:
letters[-1] = chr(ord(letters[-1]) + 1)
self.next_suffix = ''.join(letters)
return s
def reset(self):
self.next_suffix = 'A'
return
def __call__(self, timestamp):
if timestamp == self.last_timestamp:
timestamp_str = '%s%s' % (timestamp, self.suffix())
else:
self.last_timestamp = timestamp
self.reset()
timestamp_str = '%s' % timestamp
return timestamp_str
uniqify = uniqify()
if __name__ == '__main__':
for n in range(1000):
print uniqify(1)
for n in range(1000):
print uniqify(2)
Look at this answer for a robust method to convert a number to an alphanumeric id
The code I present doesn't go from 'Z' to 'AA', instead goes to 'BA', but I suppose that doesn't matter, it still produces a unique id
from string import uppercase as up
import itertools
def to_base(q, alphabet):
if q < 0: raise ValueError( "must supply a positive integer" )
l = len(alphabet)
converted = []
while q != 0:
q, r = divmod(q, l)
converted.insert(0, alphabet[r])
return "".join(converted) or alphabet[0]
class TimestampUniqifier( object ):
def __init__(self):
self.last = ''
self.counter = itertools.count()
def __call__( self, str ):
if str == self.last:
suf = self.counter.next()
return str + to_base( suf, up )
else:
self.last = str
self.counter = itertools.count()
return str
timestamp_uniqify = TimestampUniqifier()
usage:
timestamp_uniqify('1')
'1'
timestamp_uniqify('1')
'1A'
timestamp_uniqify('1')
'1B'
timestamp_uniqify('1')
'1C'
timestamp_uniqify('2')
'2'
timestamp_uniqify('3')
'3'
timestamp_uniqify('3')
'3A'
timestamp_uniqify('3')
'3B'
You can call it maaaany times and it will still produce good results:
for i in range(100): print timestamp_uniqify('4')
4
4A
4B
4C
4D
4E
4F
4G
4H
4I
4J
4K
4L
4M
4N
4O
4P
4Q
4R
4S
4T
4U
4V
4W
4X
4Y
4Z
4BA
4BB
4BC
4BD
4BE
4BF
4BG
4BH
4BI
4BJ
4BK
4BL
4BM
4BN
4BO
4BP
4BQ
4BR
4BS
4BT
4BU
4BV
4BW
4BX
4BY
4BZ
4CA
4CB
4CC
4CD
4CE
4CF
4CG
4CH
4CI
4CJ
4CK
4CL
4CM
4CN
4CO
4CP
4CQ
4CR
4CS
4CT
4CU
4CV
4CW
4CX
4CY
4CZ
4DA
4DB
4DC
4DD
4DE
4DF
4DG
4DH
4DI
4DJ
4DK
4DL
4DM
4DN
4DO
4DP
4DQ
4DR
4DS
4DT
4DU
Does the suffix have to be letters like that?
from itertools import count
def unique(timestamp):
if timestamp in unique.ts.keys():
return timestamp + '.' + str(unique.ts[timestamp].next())
else:
unique.ts[timestamp] = count()
return timestamp
unique.ts = {}
You can define a different count if you want the letters back.
This isn't the same as your perl code, though.
Looking at the problem it seems like a good fit for a coroutine (Python 2.5 or higher). Here's some code that will roughly produce the same result:
def uniqify():
seen = {}
val = (yield None)
while True:
if val in seen:
idxa, idxb = seen[val]
idxb += 1
else:
idxa, idxb = (len(seen)+1, ord('a'))
seen[val] = (idxa, idxb)
uniq = "%s%s" % (idxa, chr(idxb))
val = (yield uniq)
And here's how you use it:
>>> u = send.uniqify()
>>> u.next() #need this to start the generator
>>> u.send(1)
'1a'
>>> u.send(1)
'1b'
>>> u.send(1)
'1c'
>>> u.send(2)
'2a'
>>> u.send(2)
'2b'
>>> u.send(1) #you can go back to previous values
'1d'
>>> u.send('stringy') #you can send it anything that can be used as a dict key
'3a'
This is my first time answering, and I used globals, but it seemed the simplest way to me.
from string import uppercase
last_ts = None
letters = None
def increment(letters):
if not letters:
return "A"
last_letter = letters[-1]
if last_letter == "Z":
return increment(letters[:-1]) + "A"
return letters[:-1] + uppercase[uppercase.index(last_letter) + 1]
def uniquify(timestamp):
global last_ts, letters
if timestamp == last_ts:
letters = increment(letters)
return timestamp + letters
last_ts = timestamp
letters = None
return timestamp
print uniquify("1")
print uniquify('1')
print uniquify("1")
print uniquify("2")
for each in range(100): print uniquify("2")
1
1A
1B
2
2A
2B
2C
2D
2E
2F
2G
2H
2I
2J
2K
2L
2M
2N
2O
2P
2Q
2R
2S
2T
2U
2V
2W
2X
2Y
2Z
2AA
2AB
2AC
2AD
2AE
2AF
2AG
2AH
2AI
2AJ
2AK
2AL
2AM
2AN
2AO
2AP
2AQ
2AR
2AS
2AT
2AU
2AV
2AW
2AX
2AY
2AZ
2BA
2BB
2BC
2BD
2BE
2BF
2BG
2BH
2BI
2BJ
2BK
2BL
2BM
2BN
2BO
2BP
2BQ
2BR
2BS
2BT
2BU
2BV
2BW
2BX
2BY
2BZ
2CA
2CB
2CC
2CD
2CE
2CF
2CG
2CH
2CI
2CJ
2CK
2CL
2CM
2CN
2CO
2CP
2CQ
2CR
2CS
2CT
2CU
2CV