Is there any library to deserialize with python which is serialized with java

前端 未结 7 1291
野性不改
野性不改 2021-01-04 09:49

Is there any library to deserialize with python which is serialized with java?

7条回答
  •  孤城傲影
    2021-01-04 10:42

    Here's a very, very, rough implementation that doesn't require any imports. The tricky thing here is some classes have custom serializers which require the original code to deserialize. Also, my particular file was gzip'd though I didn't include code for that. I used these two pages for reference:

    http://www.javaworld.com/community/node/2915 http://docs.oracle.com/javase/7/docs/platform/serialization/spec/protocol.html

    #!/usr/bin/python
    
    def parse(f):
        h = lambda s: ' '.join('%.2X' % ord(x) for x in s) # format as hex
        p = lambda s: sum(ord(x)*256**i for i, x in enumerate(reversed(s))) # parse integer
        magic = f.read(2)
        assert magic == '\xAC\xED', h(magic) # STREAM_MAGIC
        assert p(f.read(2)) == 5 # STREAM_VERSION
        handles = []
        def parse_obj():
            b = f.read(1)
            if not b:
                raise StopIteration # not necessarily the best thing to throw here.
            if b == '\x70': # p TC_NULL
                return None
            elif b == '\x71': # q TC_REFERENCE
                handle = p(f.read(4)) - 0x7E0000 # baseWireHandle
                o = handles[handle]
                return o[1]
            elif b == '\x74': # t TC_STRING
                string = f.read(p(f.read(2))).decode('utf-8')
                handles.append(('TC_STRING', string))
                return string
            elif b == '\x75': # u TC_ARRAY
                data = []
                cls = parse_obj()
                size = p(f.read(4))
                handles.append(('TC_ARRAY', data))
                assert cls['_name'] in ('[B', '[I'), (cls['_name'], size, f.read(50))
                for x in range(size):
                    data.append(f.read({'[B': 1, '[I': 4}[cls['_name']]))
                return data
            elif b == '\x7E': # ~ TC_ENUM
                enum = {}
                enum['_cls'] = parse_obj()
                handles.append(('TC_ENUM', enum))
                enum['_name'] = parse_obj()
                return enum
            elif b == '\x72': # r TC_CLASSDESC
                cls = {'fields': []}
                full_name = f.read(p(f.read(2)))
                cls['_name'] = full_name.split('.')[-1] # i don't care about full path
                f.read(8) # uid
                cls['flags'] = f.read(1)
                handles.append(('TC_CLASSDESC', cls))
                assert cls['flags'] in ('\2', '\3', '\x0C', '\x12'), h(cls['flags'])
                b = f.read(2)
                for i in range(p(b)):
                    typ = f.read(1)
                    name = f.read(p(f.read(2)))
                    fcls = parse_obj() if typ in 'L[' else ''
                    cls['fields'].append((name, typ, fcls.split('/')[-1])) # don't care about full path
                b = f.read(1)
                assert b == '\x78', h(b)
                cls['parent'] = parse_obj()
                return cls
            # TC_OBJECT
            assert b == '\x73', (h(b), h(f.read(4)), f.read(50))
            obj = {}
            obj['_cls'] = parse_obj()
            obj['_name'] = obj['_cls']['_name']
            handle = len(handles)
            parents = [obj['_cls']]
            while parents[0]['parent']:
                parents.insert(0, parents[0]['parent'])
            handles.append(('TC_OBJECT', obj))
            for cls in parents:
                for name, typ, fcls in cls['fields'] if cls['flags'] in ('\2', '\3') else []:
                    if typ == 'I': # Integer
                        obj[name] = p(f.read(4))
                    elif typ == 'S': # Short
                        obj[name] = p(f.read(2))
                    elif typ == 'J': # Long
                        obj[name] = p(f.read(8))
                    elif typ == 'Z': # Bool
                        b = f.read(1)
                        assert p(b) in (0, 1)
                        obj[name] = bool(p(b))
                    elif typ == 'F': # Float
                        obj[name] = h(f.read(4))
                    elif typ in 'BC': # Byte, Char
                        obj[name] = f.read(1)
                    elif typ in 'L[': # Object, Array
                        obj[name] = parse_obj()
                    else: # Unknown
                        assert False, (name, typ, fcls)
                if cls['flags'] in ('\3', '\x0C'): # SC_WRITE_METHOD, SC_BLOCKDATA
                    b = f.read(1)
                    if b == '\x77': # see the readObject / writeObject methods
                        block = f.read(p(f.read(1)))
                        if cls['_name'].endswith('HashMap') or cls['_name'].endswith('Hashtable'):
                            # http://javasourcecode.org/html/open-source/jdk/jdk-6u23/java/util/HashMap.java.html
                            # http://javasourcecode.org/html/open-source/jdk/jdk-6u23/java/util/Hashtable.java.html
                            assert len(block) == 8, h(block)
                            size = p(block[4:])
                            obj['data'] = [] # python doesn't allow dicts as keys
                            for i in range(size):
                                k = parse_obj()
                                v = parse_obj()
                                obj['data'].append((k, v))
                            try:
                                obj['data'] = dict(obj['data'])
                            except TypeError:
                                pass # non hashable keys
                        elif cls['_name'].endswith('HashSet'):
                            # http://javasourcecode.org/html/open-source/jdk/jdk-6u23/java/util/HashSet.java.html
                            assert len(block) == 12, h(block)
                            size = p(block[-4:])
                            obj['data'] = []
                            for i in range(size):
                                obj['data'].append(parse_obj())
                        elif cls['_name'].endswith('ArrayList'):
                            # http://javasourcecode.org/html/open-source/jdk/jdk-6u23/java/util/ArrayList.java.html
                            assert len(block) == 4, h(block)
                            obj['data'] = []
                            for i in range(obj['size']):
                                obj['data'].append(parse_obj())
                        else:
                            assert False, cls['_name']
                        b = f.read(1)
                    assert b == '\x78', h(b) + ' ' + repr(f.read(50)) # TC_ENDBLOCKDATA
            handles[handle] = ('py', obj)
            return obj
        objs = []
        while 1:
            try:
                objs.append(parse_obj())
            except StopIteration:
                return objs
    
    if __name__ == '__main__':
        import sys, json
        json.dump(parse(sys.stdin), sys.stdout, indent=2)
    

提交回复
热议问题