Fastest Bitwise Xor Between Two Multibyte Binary Data Variables
Solution 1:
Not tested
Don't know if it's faster
supposing that len(mystring) is a multiple of 4
def xor(hash,mystring):
s = struct.Struct("<L")
v1 = memoryview(hash)
tab1 = []
for i in range(5):
tab1.append(s.unpack_from(v1,i*4)
v2 = memoryview(mystring)
tab2=[]
for i in range(len(mystring)/4):
tab2.append(s.unpack_from(v1,i*4))
tab3 = []
try:
for i in range(len(mystring)/20):
for j in range(5):
tab3.append(s.pack(tab1[j]^tab2[5*i+j]))
expect IndexError:
pass
return "".join(tab3)
Solution 2:
If len(data)
is large, you might see a significant improvement from xrange
. Actually, you can replace the range function entirely with enumerate
. You might also benefit from using a list instead of appending to a string.
defxor(data, key):
l = len(key)
buff = []
for idx, val inenumerate(data):
buff.append(chr(ord(val) ^ ord(key[idx % l]))
return''.join(buff)
I haven't timed it, but off the top of my head I'd expect that to be a bit faster for large amounts of data. Make sure you measure every change.
If profiling suggests that the call to ord()
actually takes time, you can run it on all the values in key
ahead of time to save a call in the loop.
You could also turn that for loop into a plain old list comprehension, but it will negatively impact readability. Regardless, try it and see if it's way faster.
Solution 3:
This code should work in Python 2.6+ including Py3k.
from binascii import hexlify as _hexlify
from binascii import unhexlify as _unhexlify
defpackl(lnum, padmultiple=0):
"""Packs the lnum (which must be convertable to a long) into a
byte string 0 padded to a multiple of padmultiple bytes in size. 0
means no padding whatsoever, so that packing 0 result in an empty
string. The resulting byte string is the big-endian two's
complement representation of the passed in long."""if lnum == 0:
returnb'\0' * padmultiple
elif lnum < 0:
raise ValueError("Can only convert non-negative numbers.")
s = hex(lnum)[2:]
s = s.rstrip('L')
iflen(s) & 1:
s = '0' + s
s = _unhexlify(s)
if (padmultiple != 1) and (padmultiple != 0):
filled_so_far = len(s) % padmultiple
if filled_so_far != 0:
s = b'\0' * (padmultiple - filled_so_far) + s
return s
defunpackl(bytestr):
"""Treats a byte string as a sequence of base 256 digits
representing an unsigned integer in big-endian format and converts
that representation into a Python integer."""returnint(_hexlify(bytestr), 16) iflen(bytestr) > 0else0defxor(data, key):
dlen = len(data)
klen = len(key)
if dlen > klen:
key = key * ((dlen + klen - 1) // klen)
key = key[:dlen]
result = packl(unpackl(data) ^ unpackl(key))
iflen(result) < dlen:
result = b'\0' * (dlen - len(result)) + result
return result
This will also work in Python 2.7 and 3.x. It has the advantage of being a lot simpler than the previous one while doing basically the same thing in approximately the same amount of time:
from binascii import hexlify as _hexlify
from binascii import unhexlify as _unhexlify
defxor(data, key):
dlen = len(data)
klen = len(key)
if dlen > klen:
key = key * ((dlen + klen - 1) // klen)
key = key[:dlen]
data = int(_hexlify(data), 16)
key = int(_hexlify(key), 16)
result = (data ^ key) | (1 << (dlen * 8 + 7))
# Python 2.6/2.7 only lines (comment out in Python 3.x)
result = memoryview(hex(result))
result = (result[4:-1] if result[-1] == 'L'else result[4:])
# Python 3.x line#result = memoryview(hex(result).encode('ascii'))[4:]
result = _unhexlify(result)
return result
Solution 4:
Disclaimer:As other posters have said, this is a really bad way to encrypt files. This article demonstrates how to reverse this kind of obfuscation trivially.
first, a simple xor algorithm:
defxor(a,b,_xor8k=lambda a,b:struct.pack("!1000Q",*map(operator.xor,
struct.unpack("!1000Q",a),
struct.unpack("!1000Q",b)))
):
iflen(a)<=8000:
s="!%iQ%iB"%divmod(len(a),8)
return struct.pack(s,*map(operator.xor,
struct.unpack(s,a),
struct.unpack(s,b)))
a=bytearray(a)
for i inrange(8000,len(a),8000):
a[i-8000:i]=_xor8k(
a[i-8000:i],
b[i-8000:i])
a[i:]=xor(a[i:],b[i:])
returnstr(a)
secondly the wrapping xor algorithm:
defxor_wrap(data,key,_struct8k=struct.Struct("!1000Q")):
l=len(key)
iflen(data)>=8000:
keyrpt=key*((7999+2*l)//l)#this buffer is accessed with whatever offset is required for a given 8k block#this expression should create at most 1 more copy of the key than is needed
data=bytearray(data)
offset=-8000#initial offset, set to zero on first loop iteration
modulo=0#offset used to access the repeated keyfor offset inrange(0,len(data)-7999,8000):
_struct8k.pack_into(data,offset,*map(operator.xor,
_struct8k.unpack_from(data,offset),
_struct8k.unpack_from(keyrpt,modulo)))
modulo+=8000;modulo%=l
offset+=8000else:offset=0;keyrpt=key*(len(data)//l+1)#simple calculation guaranteed to be enough
rest=len(data)-offset
srest=struct.Struct("!%iQ%iB"%divmod(len(data)-offset,8))
srest.pack_into(data,offset,*map(operator.xor,
srest.unpack_from(data,offset),
srest.unpack_from(keyrpt,modulo)))
return data
Solution 5:
Here's a version that only uses Python built-in and standard modules which seems very fast -- although I haven't compared it to your numpy version. It uses a couple of optimized conversion functions from the Python Cryptography Toolkit as indicated.
# Part of the Python Cryptography Toolkit# found here:# http://www.google.com/codesearch/p?hl=en#Y_gnTlD6ECg/trunk/src/gdata/Crypto/Util/number.py&q=lang:python%20%22def%20long_to_bytes%22&sa=N&cd=1&ct=rc# Improved conversion functions contributed by Barry Warsaw, after# careful benchmarkingimport struct
deflong_to_bytes(n, blocksize=0):
"""long_to_bytes(n:long, blocksize:int) : string
Convert a long integer to a byte string.
If optional blocksize is given and greater than zero, pad the front of the
byte string with binary zeros so that the length is a multiple of
blocksize.
"""# after much testing, this algorithm was deemed to be the fastest
s = ''
n = long(n)
pack = struct.pack
while n > 0:
s = pack('>I', n & 0xffffffffL) + s
n = n >> 32# strip off leading zerosfor i inrange(len(s)):
if s[i] != '\000':
breakelse:
# only happens when n == 0
s = '\000'
i = 0
s = s[i:]
# add back some pad bytes. this could be done more efficiently w.r.t. the# de-padding being done above, but sigh...if blocksize > 0andlen(s) % blocksize:
s = (blocksize - len(s) % blocksize) * '\000' + s
return s
defbytes_to_long(s):
"""bytes_to_long(string) : long
Convert a byte string to a long integer.
This is (essentially) the inverse of long_to_bytes().
"""
acc = 0L
unpack = struct.unpack
length = len(s)
if length % 4:
extra = (4 - length % 4)
s = '\000' * extra + s
length = length + extra
for i inrange(0, length, 4):
acc = (acc << 32) + unpack('>I', s[i:i+4])[0]
return acc
# original code in SO questiondefxor_orig(data, key):
l = len(key)
buff = ""for i inrange(0, len(data)):
buff += chr(ord(data[i]) ^ ord(key[i % l]))
return buff
# faster pure python versiondefxor_new(data, key):
import math
# key multiplication in order to match the data length
key = (key*int( math.ceil(float(len(data))/float(len(key)))))[:len(data)]
# convert key and data to long integers
key_as_long = bytes_to_long(key)
data_as_long = bytes_to_long(data)
# xor the numbers together and convert the result back to a byte stringreturn long_to_bytes(data_as_long ^ key_as_long)
if __name__=='__main__':
import random
import sha
TEST_DATA_LEN = 100000
data = ''.join(chr(random.randint(0, 255)) for i in xrange(TEST_DATA_LEN))
key = sha.new(data).digest()
assert xor_new(data, key) == xor_orig(data, key)
print'done'
Post a Comment for "Fastest Bitwise Xor Between Two Multibyte Binary Data Variables"