How can I get 2.x-like sorting behaviour in Python 3.x?

后端 未结 10 811
陌清茗
陌清茗 2020-11-27 06:50

I\'m trying to replicate (and if possible improve on) Python 2.x\'s sorting behaviour in 3.x, so that mutually orderable types like int, float etc.

相关标签:
10条回答
  • 2020-11-27 07:29

    I tried to implement the Python 2 sorting c code in python 3 as faithfully as possible.

    Use it like so: mydata.sort(key=py2key()) or mydata.sort(key=py2key(lambda x: mykeyfunc))

    def default_3way_compare(v, w):  # Yes, this is how Python 2 sorted things :)
        tv, tw = type(v), type(w)
        if tv is tw:
            return -1 if id(v) < id(w) else (1 if id(v) > id(w) else 0)
        if v is None:
            return -1
        if w is None:
            return 1
        if isinstance(v, (int, float)):
            vname = ''
        else:
            vname = type(v).__name__
        if isinstance(w, (int, float)):
            wname = ''
        else:
            wname = type(w).__name__
        if vname < wname:
            return -1
        if vname > wname:
            return 1
        return -1 if id(type(v)) < id(type(w)) else 1
    
    def py2key(func=None):  # based on cmp_to_key
        class K(object):
            __slots__ = ['obj']
            __hash__ = None
    
            def __init__(self, obj):
                self.obj = func(obj) if func else obj
    
            def __lt__(self, other):
                try:
                    return self.obj < other.obj
                except TypeError:
                    return default_3way_compare(self.obj, other.obj) < 0
    
            def __gt__(self, other):
                try:
                    return self.obj > other.obj
                except TypeError:
                    return default_3way_compare(self.obj, other.obj) > 0
    
            def __eq__(self, other):
                try:
                    return self.obj == other.obj
                except TypeError:
                    return default_3way_compare(self.obj, other.obj) == 0
    
            def __le__(self, other):
                try:
                    return self.obj <= other.obj
                except TypeError:
                    return default_3way_compare(self.obj, other.obj) <= 0
    
            def __ge__(self, other):
                try:
                    return self.obj >= other.obj
                except TypeError:
                    return default_3way_compare(self.obj, other.obj) >= 0
        return K
    
    0 讨论(0)
  • 2020-11-27 07:30

    This answer aims to faithfully re-create the Python 2 sort order, in Python 3, in every detail.

    The actual Python 2 implementation is quite involved, but object.c's default_3way_compare does the final fallback after instances have been given a chance to implement normal comparison rules. This is after individual types have been given a chance to compare (via the __cmp__ or __lt__ hooks).

    Implementing that function as pure Python in a wrapper, plus emulating the exceptions to the rules (dict and complex numbers specifically) gives us the same Python 2 sorting semantics in Python 3:

    from numbers import Number
    
    
    # decorator for type to function mapping special cases
    def per_type_cmp(type_):
        try:
            mapping = per_type_cmp.mapping
        except AttributeError:
            mapping = per_type_cmp.mapping = {}
        def decorator(cmpfunc):
            mapping[type_] = cmpfunc
            return cmpfunc
        return decorator
    
    
    class python2_sort_key(object):
        _unhandled_types = {complex}
    
        def __init__(self, ob):
           self._ob = ob
    
        def __lt__(self, other):
            _unhandled_types = self._unhandled_types
            self, other = self._ob, other._ob  # we don't care about the wrapper
    
            # default_3way_compare is used only if direct comparison failed
            try:
                return self < other
            except TypeError:
                pass
    
            # hooks to implement special casing for types, dict in Py2 has
            # a dedicated __cmp__ method that is gone in Py3 for example.
            for type_, special_cmp in per_type_cmp.mapping.items():
                if isinstance(self, type_) and isinstance(other, type_):
                    return special_cmp(self, other)
    
            # explicitly raise again for types that won't sort in Python 2 either
            if type(self) in _unhandled_types:
                raise TypeError('no ordering relation is defined for {}'.format(
                    type(self).__name__))
            if type(other) in _unhandled_types:
                raise TypeError('no ordering relation is defined for {}'.format(
                    type(other).__name__))
    
            # default_3way_compare from Python 2 as Python code
            # same type but no ordering defined, go by id
            if type(self) is type(other):
                return id(self) < id(other)
    
            # None always comes first
            if self is None:
                return True
            if other is None:
                return False
    
            # Sort by typename, but numbers are sorted before other types
            self_tname = '' if isinstance(self, Number) else type(self).__name__
            other_tname = '' if isinstance(other, Number) else type(other).__name__
    
            if self_tname != other_tname:
                return self_tname < other_tname
    
            # same typename, or both numbers, but different type objects, order
            # by the id of the type object
            return id(type(self)) < id(type(other))
    
    
    @per_type_cmp(dict)
    def dict_cmp(a, b, _s=object()):
        if len(a) != len(b):
            return len(a) < len(b)
        adiff = min((k for k in a if a[k] != b.get(k, _s)), key=python2_sort_key, default=_s)
        if adiff is _s:
            # All keys in a have a matching value in b, so the dicts are equal
            return False
        bdiff = min((k for k in b if b[k] != a.get(k, _s)), key=python2_sort_key)
        if adiff != bdiff:
            return python2_sort_key(adiff) < python2_sort_key(bdiff)
        return python2_sort_key(a[adiff]) < python2_sort_key(b[bdiff])
    

    I incorporated handling dictionary sorting as implemented in Python 2, since that'd be supported by the type itself via a __cmp__ hook. I've stuck to the Python 2 ordering for the keys and values as well, naturally.

    I've also added special casing for complex numbers, as Python 2 raises an exception when you try sort to these:

    >>> sorted([0.0, 1, (1+0j), False, (2+3j)])
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: no ordering relation is defined for complex numbers
    

    You may have to add more special cases if you want to emulate Python 2 behaviour exactly.

    If you wanted to sort complex numbers anyway you'll need to consistently put them with the non-numbers group; e.g.:

    # Sort by typename, but numbers are sorted before other types
    if isinstance(self, Number) and not isinstance(self, complex):
        self_tname = ''
    else:
        self_tname = type(self).__name__
    if isinstance(other, Number) and not isinstance(other, complex):
        other_tname = ''
    else:
        other_tname = type(other).__name__
    

    Some test cases:

    >>> sorted([0, 'one', 2.3, 'four', -5], key=python2_sort_key)
    [-5, 0, 2.3, 'four', 'one']
    >>> sorted([0, 123.4, 5, -6, 7.89], key=python2_sort_key)
    [-6, 0, 5, 7.89, 123.4]
    >>> sorted([{1:2}, {3:4}], key=python2_sort_key)
    [{1: 2}, {3: 4}]
    >>> sorted([{1:2}, None, {3:4}], key=python2_sort_key)
    [None, {1: 2}, {3: 4}]
    
    0 讨论(0)
  • 2020-11-27 07:30

    Not running Python 3 here, but maybe something like this would work. Test to see if doing a "less than" compare on "value" creates an exception and then do "something" to handle that case, like convert it to a string.

    Of course you'd still need more special handling if there are other types in your list that are not the same type but are mutually orderable.

    from numbers import Real
    from decimal import Decimal
    
    def motley(value):
        numeric = Real, Decimal
        if isinstance(value, numeric):
            typeinfo = numeric
        else:
            typeinfo = type(value)
    
        try:
            x = value < value
        except TypeError:
            value = repr(value)
    
        return repr(typeinfo), value
    
    >>> print sorted([0, 'one', 2.3, 'four', -5, (2+3j), (1-3j)], key=motley)
    [-5, 0, 2.3, (1-3j), (2+3j), 'four', 'one']
    
    0 讨论(0)
  • 2020-11-27 07:30

    To avoid the use of exceptions and going for a type based solution, i came up with this:

    #! /usr/bin/python3
    
    import itertools
    
    def p2Sort(x):
        notImpl = type(0j.__gt__(0j))
        it = iter(x)
        first = next(it)
        groups = [[first]]
        types = {type(first):0}
        for item in it:
            item_type = type(item)
            if item_type in types.keys():
                groups[types[item_type]].append(item)
            else:
                types[item_type] = len(types)
                groups.append([item])
    
        #debuggng
        for group in groups:
            print(group)
            for it in group:
                print(type(it),)
        #
    
        for i in range(len(groups)):
            if type(groups[i][0].__gt__(groups[i][0])) == notImpl:
                continue
            groups[i] = sorted(groups[i])
    
        return itertools.chain.from_iterable(group for group in groups)
    
    x = [0j, 'one', 2.3, 'four', -5, 3j, 0j,  -5.5, 13 , 15.3, 'aa', 'zz']
    print(list(p2Sort(x)))
    

    Note that an additional dictionary to hold the different types in list and a type holding variable (notImpl) is needed. Further note, that floats and ints aren't mixed here.

    Output:

    ================================================================================
    05.04.2017 18:27:57
    ~/Desktop/sorter.py
    --------------------------------------------------------------------------------
    [0j, 3j, 0j]
    <class 'complex'>
    <class 'complex'>
    <class 'complex'>
    ['one', 'four', 'aa', 'zz']
    <class 'str'>
    <class 'str'>
    <class 'str'>
    <class 'str'>
    [2.3, -5.5, 15.3]
    <class 'float'>
    <class 'float'>
    <class 'float'>
    [-5, 13]
    <class 'int'>
    <class 'int'>
    [0j, 3j, 0j, 'aa', 'four', 'one', 'zz', -5.5, 2.3, 15.3, -5, 13]
    
    0 讨论(0)
提交回复
热议问题