Fastest Bitwise Xor Between Two Multibyte Binary Data Variables

October 17, 2023 Post a Comment

What is the fastest way to implementat the following logic: def xor(data, key): l = len(key) buff = '' for i in range(0, len(data)): buff += chr(ord(data[i]) ^

Solution 1:

Not tested

Don't know if it's faster

supposing that len(mystring) is a multiple of 4

def xor(hash,mystring):
    s = struct.Struct("<L")

    v1 = memoryview(hash)

    tab1 = []
    for i in range(5):
        tab1.append(s.unpack_from(v1,i*4)

    v2 = memoryview(mystring)
    tab2=[]
    for i in range(len(mystring)/4):
        tab2.append(s.unpack_from(v1,i*4))
    tab3 = []
    try:
        for i in range(len(mystring)/20):
            for j in range(5):
               tab3.append(s.pack(tab1[j]^tab2[5*i+j]))
    expect IndexError:
        pass
    return "".join(tab3)

Solution 2:

If len(data) is large, you might see a significant improvement from xrange. Actually, you can replace the range function entirely with enumerate. You might also benefit from using a list instead of appending to a string.

defxor(data, key):
    l = len(key)
    buff = []
    for idx, val inenumerate(data):
        buff.append(chr(ord(val) ^ ord(key[idx % l]))
    return''.join(buff)

I haven't timed it, but off the top of my head I'd expect that to be a bit faster for large amounts of data. Make sure you measure every change.

If profiling suggests that the call to ord() actually takes time, you can run it on all the values in key ahead of time to save a call in the loop.

You could also turn that for loop into a plain old list comprehension, but it will negatively impact readability. Regardless, try it and see if it's way faster.

Solution 3:

This code should work in Python 2.6+ including Py3k.

from binascii import hexlify as _hexlify
from binascii import unhexlify as _unhexlify


defpackl(lnum, padmultiple=0):
    """Packs the lnum (which must be convertable to a long) into a
    byte string 0 padded to a multiple of padmultiple bytes in size. 0
    means no padding whatsoever, so that packing 0 result in an empty
    string.  The resulting byte string is the big-endian two's
    complement representation of the passed in long."""if lnum == 0:
        returnb'\0' * padmultiple
    elif lnum < 0:
        raise ValueError("Can only convert non-negative numbers.")
    s = hex(lnum)[2:]
    s = s.rstrip('L')
    iflen(s) & 1:
        s = '0' + s
    s = _unhexlify(s)
    if (padmultiple != 1) and (padmultiple != 0):
        filled_so_far = len(s) % padmultiple
        if filled_so_far != 0:
            s = b'\0' * (padmultiple - filled_so_far) + s
    return s

defunpackl(bytestr):
    """Treats a byte string as a sequence of base 256 digits
    representing an unsigned integer in big-endian format and converts
    that representation into a Python integer."""returnint(_hexlify(bytestr), 16) iflen(bytestr) > 0else0defxor(data, key):
    dlen = len(data)
    klen = len(key)
    if dlen > klen:
        key = key * ((dlen + klen - 1) // klen)
    key = key[:dlen]
    result = packl(unpackl(data) ^ unpackl(key))
    iflen(result) < dlen:
         result = b'\0' * (dlen - len(result)) + result
    return result

This will also work in Python 2.7 and 3.x. It has the advantage of being a lot simpler than the previous one while doing basically the same thing in approximately the same amount of time:

from binascii import hexlify as _hexlify
from binascii import unhexlify as _unhexlify

defxor(data, key):
    dlen = len(data)
    klen = len(key)
    if dlen > klen:
        key = key * ((dlen + klen - 1) // klen)
    key = key[:dlen]
    data = int(_hexlify(data), 16)
    key = int(_hexlify(key), 16)
    result = (data ^ key) | (1 << (dlen * 8 + 7))
    # Python 2.6/2.7 only lines (comment out in Python 3.x)
    result = memoryview(hex(result))
    result = (result[4:-1] if result[-1] == 'L'else result[4:])
    # Python 3.x line#result = memoryview(hex(result).encode('ascii'))[4:]
    result = _unhexlify(result)
    return result

Solution 4:

Disclaimer:As other posters have said, this is a really bad way to encrypt files. This article demonstrates how to reverse this kind of obfuscation trivially.

first, a simple xor algorithm:

defxor(a,b,_xor8k=lambda a,b:struct.pack("!1000Q",*map(operator.xor,
                    struct.unpack("!1000Q",a),
                    struct.unpack("!1000Q",b)))
        ):
    iflen(a)<=8000:
        s="!%iQ%iB"%divmod(len(a),8)
        return struct.pack(s,*map(operator.xor,
            struct.unpack(s,a),
            struct.unpack(s,b)))
    a=bytearray(a)
    for i inrange(8000,len(a),8000):
        a[i-8000:i]=_xor8k(
            a[i-8000:i],
            b[i-8000:i])
    a[i:]=xor(a[i:],b[i:])
    returnstr(a)

secondly the wrapping xor algorithm:

defxor_wrap(data,key,_struct8k=struct.Struct("!1000Q")):
    l=len(key)
    iflen(data)>=8000:
        keyrpt=key*((7999+2*l)//l)#this buffer is accessed with whatever offset is required for a given 8k block#this expression should create at most 1 more copy of the key than is needed
        data=bytearray(data)
        offset=-8000#initial offset, set to zero on first loop iteration
        modulo=0#offset used to access the repeated keyfor offset inrange(0,len(data)-7999,8000):
            _struct8k.pack_into(data,offset,*map(operator.xor,
                _struct8k.unpack_from(data,offset),
                _struct8k.unpack_from(keyrpt,modulo)))
            modulo+=8000;modulo%=l
        offset+=8000else:offset=0;keyrpt=key*(len(data)//l+1)#simple calculation guaranteed to be enough
    rest=len(data)-offset
    srest=struct.Struct("!%iQ%iB"%divmod(len(data)-offset,8))
    srest.pack_into(data,offset,*map(operator.xor,
        srest.unpack_from(data,offset),
        srest.unpack_from(keyrpt,modulo)))
    return data

Solution 5:

Here's a version that only uses Python built-in and standard modules which seems very fast -- although I haven't compared it to your numpy version. It uses a couple of optimized conversion functions from the Python Cryptography Toolkit as indicated.

# Part of the Python Cryptography Toolkit# found here:# http://www.google.com/codesearch/p?hl=en#Y_gnTlD6ECg/trunk/src/gdata/Crypto/Util/number.py&q=lang:python%20%22def%20long_to_bytes%22&sa=N&cd=1&ct=rc# Improved conversion functions contributed by Barry Warsaw, after# careful benchmarkingimport struct

deflong_to_bytes(n, blocksize=0):
    """long_to_bytes(n:long, blocksize:int) : string
    Convert a long integer to a byte string.

    If optional blocksize is given and greater than zero, pad the front of the
    byte string with binary zeros so that the length is a multiple of
    blocksize.
    """# after much testing, this algorithm was deemed to be the fastest
    s = ''
    n = long(n)
    pack = struct.pack
    while n > 0:
        s = pack('>I', n & 0xffffffffL) + s
        n = n >> 32# strip off leading zerosfor i inrange(len(s)):
        if s[i] != '\000':
            breakelse:
        # only happens when n == 0
        s = '\000'
        i = 0
    s = s[i:]
    # add back some pad bytes.  this could be done more efficiently w.r.t. the# de-padding being done above, but sigh...if blocksize > 0andlen(s) % blocksize:
        s = (blocksize - len(s) % blocksize) * '\000' + s
    return s

defbytes_to_long(s):
    """bytes_to_long(string) : long
    Convert a byte string to a long integer.

    This is (essentially) the inverse of long_to_bytes().
    """
    acc = 0L
    unpack = struct.unpack
    length = len(s)
    if length % 4:
        extra = (4 - length % 4)
        s = '\000' * extra + s
        length = length + extra
    for i inrange(0, length, 4):
        acc = (acc << 32) + unpack('>I', s[i:i+4])[0]
    return acc


# original code in SO questiondefxor_orig(data, key):
    l = len(key)

    buff = ""for i inrange(0, len(data)):
        buff += chr(ord(data[i]) ^ ord(key[i % l]))
    return buff

# faster pure python versiondefxor_new(data, key):
    import math

    # key multiplication in order to match the data length
    key = (key*int( math.ceil(float(len(data))/float(len(key)))))[:len(data)]

    # convert key and data to long integers
    key_as_long = bytes_to_long(key)
    data_as_long = bytes_to_long(data)

    # xor the numbers together and convert the result back to a byte stringreturn long_to_bytes(data_as_long ^ key_as_long)

if __name__=='__main__':
    import random
    import sha

    TEST_DATA_LEN = 100000

    data = ''.join(chr(random.randint(0, 255)) for i in xrange(TEST_DATA_LEN))
    key = sha.new(data).digest()

    assert xor_new(data, key) == xor_orig(data, key)
    print'done'

Python Guru