#!/bin/env python #********************************************************************** # Copyright (c) 2003 Karl Auerbach, Santa Cruz, California, USA * # All Rights Reserved * # * # Permission to use, copy, modify, and distribute this software and * # its documentation for any purpose and without fee is hereby * # granted, provided that the above copyright notice appear in all * # copies and that both that copyright notice and this permission * # notice appear in supporting documentation, and that the name of * # Karl Auerbach not be used in advertising or publicity pertaining to * # distribution of the software without specific, written prior * # permission. * # * # KARL AUERBACH DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS * # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND * # FITNESS, IN NO EVENT SHALL KARL AUERBACH BE LIABLE FOR ANY SPECIAL, * # INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER * # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION * # OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR * # IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * #********************************************************************** # The above license is the same as the Python license but with the * # date and name of the author(s)/licensor(s) changed. * # See http://www.python.org/doc/Copyright.html * #********************************************************************** #********************************************************************** # Please report bugs to the following e-mail address, which has been * # somewhat obfuscated to ward off spambots - you can cope: * # karl + keyword + cblib.b11f77 at cavebear.com * #********************************************************************** #********************************************************************** # This file originally came from http://www.cavebear.com/cblib/ * #********************************************************************** #********************************************************************** # $Id: ka_guidgen.py,v 0.2 2004/04/13 06:24:24 karl Exp $ # # $Log: ka_guidgen.py,v $ # Revision 0.2 2004/04/13 06:24:24 karl # Ran through pychecker - not perfect, but improved. # # Revision 0.1 2004/03/18 23:40:45 karl # Initial rcs check in. # #********************************************************************** import random import socket import time from types import * class ka_GUID(object): """GUID Generator A GUID is a 128 bit (16 octet) identifier that is, hopefully, globally unique. GUIDs are traditionally presented in a number of formats. We don't want to get into that here. So we mainly deal with an internal format and can add methods to generate external formats as desired. str() will return a 32 character string containing the hexidecimal representation of those 16 octets/128 bits of the GUID. The internal format here is simply a sequence of 4 32-bit integers. Also available is a string format of 32 characters representing the 32 hexidecimal digits of those 128 bits. Ideally we want to base the GUID on something likely to make the result unique. MAC addresses are good for this. However Python doesn't seem to have a well established method for getting MAC addresses. In the meantime we will use the the IP address of the first interface - this is not necessarily a worldwide uniqe value (darn those NATs!). Nor does a machine always have an IP address - in which case we'll brew one up. If we do have to create an address, we want to minimize the chance of collision - the chance is small, but hey, why add to the risk? So we'll take the IP multicast space (224.x.x.x thrugh 239.x.x.x and build a random address in that space, or 224.x.x.x with a /4 prefix, in other words 0xE0.x.x.x with a mask of 0xF0.0O.00.00 The IPv4 address gives us 32 bits, but since hope springs eternal we'll leave room for future good things. MAC addresses are presently 48 bits - but the IEEE is moving to 64 bits. So we'll pad the 32 bits we get from the IP address with 32 random bits. You might ask about IPv6 - will it ever happen? A timestamp adds more differentiation. However under portable Python we can't really count on better than one second resolution, sigh. The time.time() function seems to give at least milliseconds on Linux. So we'll take whatever time.time() gives us and use it as a 48-bit number. (Some juggling will be needed to do this.) OK, with 64 bits from the IP+random and 48 bits from the clock we've covered 112 bits. We need another 16. We'll fill that with random bits. OK, how do we lay this out? The choice could be arbitrary. However, I am supposing that these GUID's may be used as keys of some kind. In some databases, such as MySQL, one might build an index using only the first N characters of the GUID. Since the IP address/MAC part is likely to be invarient on a given machine, I'm moving that to the right end and pushing the more variable parts toward the front. So here's my chosen layout, each position represents an 8-bit octet: FFrrSSSS IIIIRRRR 1234567890123456 F is an octet derived from the time (fractional seconds) r is a random octet. S is an octet derived from the time (in seconds) I is an octet derived from the IPv4 address (real or imagined) R is a random octet used to pad the IP address out to 64 bits In terms of the 4 32-bit words we use to hold things: Word 0: FFrr Word 1: SSSS Word 2: IIII Word 3: RRRR In terms of the string format returned by str() we have: FFFFrrrrSSSSSSSSIIIIIIIIRRRRRRRR 0 1 2 3 01234567890123456789012345678901 There is a fair amount of sort-of randomness in the FFrr part, particularly if the GUIDS were generated over a period fo time - so you might chose to use FFrr directly as a hash key. An argument can be made that we ought to generate the RRRR random value once so that the IIIIRRRR part is fixed. But that places a burden on the "rr" random number to differentiate things (see the "Concern" below). So, just to be conservative we'll generate both RRRR and rr on every GUID. Concern: If the following conditions conjoined the above format could be inadequate: 64 bit MAC addresses are a reality and available to Python (i.e. the "R" field above is no longer random) The time resolution is bad (like one second) The computer is really, really fast so that it's possible to generate more than 64K (16-bits) worth of GUIDs in a single time resolution unit. All of this is highly unlikely here in May 2003. Members of the class itself: __RanGen - Our own private random number generator instance (we don't want other folks messin' with our random generator.) __IP_Int - The 32 bits we got from the IP address, i.e. IIII. Members of each instance: __GUID_Words - A tuple of 4 32-bit integers that together form the 128 bit GUID. """ #****************************** # Begin class initialization #****************************** __RanGen = random.Random() __IP_Int = 0 try: ipstr = socket.inet_aton(socket.gethostbyname(socket.gethostname())) for i in ipstr: # Python 2.3 complains that the left shift below could result in a # long in Python 2.4+ __IP_Int = int((__IP_Int << 8) + ord(i)) del i del ipstr except: # Now we gotta brew something up, sigh # Python 2.3 is gonna complain about constants > sys.maxint, sigh __IP_Int = int(0xE0000000L + __RanGen.randint(0,0xFFFFFFFL)) #****************************** # End of class initialization #****************************** #********************************************************************** # Instance initializer #********************************************************************** def __init__(self, guid=None): if (guid == None) or (type(guid) != type(self)): time_now = time.time() time_int_part = int(time_now) # Do a 16 bit left shift the hard way... because we # want those fractional bits in the floating point number time_frac_part = int((time_now * 65536.0) % 65536.0) # Now put the whole thing together... # Note: for the RRRR part we would normally call randint() # with the range of -2147483648 .. 2147483647 # However, due to the implementation of randint() we need to go one less # on the upper bound. # Python 2.3 complains that the left shift below could result in a # long in Python 2.4+ self.__GUID_Words = (int(time_frac_part << 16) | self.__class__.__RanGen.randint(0, 0x0000FFFF), time_int_part, self.__class__.__IP_Int, self.__class__.__RanGen.randint(-2147483648, 2147483646)) del time_now del time_int_part del time_frac_part else: # Copy constructor self.__GUID_Words = guid.__GUID_Words #********************************************************************** # __hash__ #********************************************************************** def __hash__(self): return self.__GUID_Words[0] #********************************************************************** # __cmp__ #********************************************************************** def __cmp__(self, other): for x, y in map(None, self.__GUID_Words, other.__GUID_Words): if x == y: continue if x > y: return 1 return -1 return 0 #********************************************************************** # __eq__ #********************************************************************** def __eq__(self, other): try: for x, y in map(None, self.__GUID_Words, other.__GUID_Words): if x != y: return False return True except AttributeError: return False #********************************************************************** # __ne__ #********************************************************************** def __ne__(self, other): return not self.__eq__(other) #********************************************************************** # __ge__ # __gt__ # __le__ # __lt__ #********************************************************************** #********************************************************************** # __nonzero__ #********************************************************************** def __nonzero__(self): return True #********************************************************************** # __repr__ #********************************************************************** #********************************************************************** # Support the standard str() function #********************************************************************** def __str__(self): # Python 2.3 complains that negative values will return a signed # string in Python 2.4+ return ("%08X%08X%08X%08X" % self.__GUID_Words) #********************************************************************** # Obtain a reference to the GUID (immutable) #********************************************************************** def GetGuid(self): """Provide the GUID as a tuple of 4 integers """ return self.__GUID_Words #********************************************************************** # Get the GUID in various formats #********************************************************************** def GetHexFormat(self): """Provide the GUID as a string of hex characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx """ return str(self) def GetPlainFormat(self): """Provide the GUID as a string plain format xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx """ s = str(self) return s[0:8] + '-' + s[8:12] + '-' + s[12:16] + '-' + s[16:20] +\ '-' + s[20:] def GetRegFormat(self): """Provide the GUID as a string in MS Registry Format {xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx} """ return '{' + self.GetPlainFormat() + '}' def Get8ChunkFormat(self): """Provide the GUID as a tuple of 8 strings: xxxxxxxx xxxx xxxx xx xx xx xx xx xx xx xx """ s = str(self) return (s[0:8], s[8:12], s[12:16], s[16:18], s[18:20], s[20:22], s[22:24], s[24:26], s[26:28], s[28:30], s[30:32]) def GetC8ChunkFormat(self): """Provide the GUID as a tuple of 8 strings with each string prefixed with "0x" xxxxxxxx xxxx xxxx xx xx xx xx xx xx xx xx """ s = str(self) return ("0x"+s[0:8], "0x"+s[8:12], "0x"+s[12:16], "0x"+s[16:18], "0x"+s[18:20], "0x"+s[20:22], "0x"+s[22:24], "0x"+s[24:26], "0x"+s[26:28], "0x"+s[28:30], "0x"+s[30:32]) ####################################################################### # For scaffold testing... ####################################################################### if __name__ == "__main__": tg1 = ka_GUID() print "tg1 is " + tg1.GetHexFormat() print tg1.GetGuid() print tg1.GetPlainFormat() print tg1.GetRegFormat() for c in tg1.Get8ChunkFormat() : print c, print for c in tg1.GetC8ChunkFormat() : print c, print tg2 = ka_GUID() print "tg2 is " + tg2.GetHexFormat() print tg2.GetGuid() print tg2.GetPlainFormat() print tg2.GetRegFormat() for c in tg2.Get8ChunkFormat() : print c, print for c in tg2.GetC8ChunkFormat() : print c, print tg3 = ka_GUID(tg1) print "t3 is " + tg3.GetHexFormat() print tg3.GetGuid() if str(tg1) != tg1.GetHexFormat(): print "Error, str() and GetHexFormat() return different values" # Tg2 should have a unique value # Tg1 and tg3 should have the same value if str(tg2) == str(tg1) : print "Error: tg1 and tg2 are not different" if str(tg2) == str(tg3) : print "Error: tg3 and tg2 are not different" if str(tg1) != str(tg3) : print "Error: tg1 and tg3 are different"