#!/usr/bin/env python # Author: Dave Hull # License: Public Domain # Version: 0.01 # Run this script from the command line as follows: # # python yahoo_msg_decoder.py # # You will be prompted to provide the archive owner's username and the # archive filename. The script will decode (xor) the archive and dump # the contents to the screen. Currently it barfs at the end because I # haven't provided a good method to check for the end of file. # # To Do: # Provide a graceful way of finishing up the file parsing. # Get rid of the interactive mode and use command line args instead. # Provide command line argument for the "other" conversant's username. # Prevent output from going bold given certain byte strings. # Provide output options for HTML. import re, os, struct from time import gmtime, strftime def getUserName(): username = '' m = None print '''Yahoo! Messenger archives are xor'd with the archive owner's username. You must know the archive owner's username to display the plain text.''' while m is None: username = raw_input('''Enter the Yahoo! Messenger user's username: ''') m = re.match('^[A-Za-z]{1}\w*\.?\w*$', username) if m is not None and 3 < len(username) < 33: return username else: m = None print '''That's not a valid Yahoo Messenger ID. Try again.''' def getFileName(): filename = raw_input('''Enter the name of the Yahoo! Messenger archive file, use the full path: ''') return filename def openFile(filename): filehandle = open(filename, 'rb') return filehandle def getHeader(filehandle): packedTime = filehandle.read(4) tz = filehandle.read(1) return tz def getMsgTime(filehandle): packedTime = filehandle.read(4) # time is always 4 bytes utcOffset = filehandle.read(1) # UTC offset follows time unpackedTime = struct.unpack("i", packedTime)[0] return strftime("%Y-%m-%d %H:%M:%S UTC", gmtime(unpackedTime)) def getMsg(filehandle, readlength, username): msg = '' packedMsg = filehandle.read(readlength) mLength = str(readlength) + "s" unpackedMsg = struct.unpack(mLength, packedMsg)[0] msgLen = len(unpackedMsg) usernameLen = len(username) while msgLen > len(username): username += username for i in range(msgLen): msg += chr(ord(unpackedMsg[i]) ^ ord(username[i])) filehandle.read(1) # there's a null byte after the message return msg def getReservedBits(filehandle): filehandle.read(3) # reserved bits are always in 3s def getNBits(filehandle, n): filehandle.read(n) def getSent(filehandle): sent = filehandle.read(1) return sent def getMsgLength(filehandle): msgLength = filehandle.read(1) return ord(msgLength) username = getUserName() filename = getFileName() filehandle = openFile(filename) # Need to read header to determine how to parse file if getHeader(filehandle) == 0: getReservedBits(filehandle) getSent(filehandle) getNBits(filehandle, 11) else: filehandle.seek(0) while 1: msgTime = getMsgTime(filehandle) getReservedBits(filehandle) if ord(getSent(filehandle)): print "other (" + msgTime + "):", else: print username + " (" + msgTime +"):", getReservedBits(filehandle) msgLen = getMsgLength(filehandle) getReservedBits(filehandle) print getMsg(filehandle, msgLen, username) getReservedBits(filehandle) filehandle.close()