#!/usr/bin/python

'''

hg history -v | hgstat.py

'''

import sys, string, re, os, rfc822, time

found_first_email = 0
found_first_signed_off = 0
first_email = ''
first_signed_off = ''
merge = ''
parents = 0
cset = 'n'

stats  = {}
stats2 = {}
t_real = t_merges = 0

alias_list = [ [ 'steven@xensource.com', 'steve@xensource.com' ],
	     [ 'steven.smith@xensource.com','sos22@cam.ac.uk', 'sos22@cam.ac.uk.', 'sos22@cl.cam.ac.uk', 'sos22@cl.cam.ac.uk.', 'steven.smith@cl.cam.ac.uk','ssmith@xensource.com' ],
	     [ 'sean.dague@ibm.com', 'sean@dague.net' ],
	     [ 'rusty.russel@ibm.com','rusty@rustcorp.com.au' ],
	     [ 'jyoung5@us.ibm.com','jerone@gmail.com' ],
	     [ 'gerd.knorr@novell.com','kraxel@suse.de', 'kraxel@bytesex.org' ],
	     [ 'christian@xensource.com','christian.limpach@cl.cam.ac.uk','c@pin.lu','cl349@cl.cam.ac.uk','limpach@cl.cam.ac.uk','cl@netbsd.org', 'christian.limpach@xensource.com' ],
	     [ 'ian@xensource.com', 'ian@xensoure.com', 'ian.pratt@cl.cam.ac.uk', 'iab@xensource.com' ],
	     [ 'kmacy@fsmware.com', 'kmacy@netapp.com' ],
	     [ 'keir@xensource.com','keir.fraser@cl.cam.ac.uk'],
	     [ 'kurt.garloff@novell.com','garloff@suse.de'], 
	     [ 'mark.williamson@xensource.com', 'mark.williamson@cl.cam.ac.uk' ],
	     [ 'andrew.warfield@xensource.com', 'andrew.warfield@cl.cam.ac.uk','akw27@cl.cam.ac.uk'],
	     [ 'xin.b.li@intel.com', 'xin.bi.li@intel.com', 'xin..b.li@intel.com' ],
             [ 'vincent@xensource.com', 'vincent@snarc.org' ],
             [ 'bthomas@virtualiron.com', 'bjthomas3@gmail.com' ],
             [ 'herbert@redhat.com', 'herbert@gondor.apana.org.au' ],
             [ 'gdunlap@xensource.com', 'dunlapg@umich.edu' ],
             [ 'leendert@us.ibm.com', 'leendert@watson.ibm.com' ],
             [ 'muli@il.ibm.com', 'mulix@mulix.org' ],
             [ 'ian.campbell@xensource.com', 'ian.campbell@xesource.com' ],
             [ 'hollisb@us.ibm.com' ,'hollis@us.ibm.com' ]
             ]
	    

def diffstat ( cset ):
    files = ins = dels = 0
    fd = os.popen( '/bin/bash -c "hg export %s | diffstat"' % cset )
    while 1:
	line = fd.readline()
	if len(line) == 0: break
	#r = re.match('^ (\d+) files changed, (\d+) insertions?\(\+\), (\d+) deletions?\(-\)$', line)	
	r = re.match('^ (\d+) files changed,', line)	
	if r:
	    files = r.group(1)
	    ins = 0
	    dels = 0
	    r = re.match('.*?(\d+) insertion.*', line)	
	    if r:
		ins = r.group(1)
	    r = re.match('.*?(\d+) deletion.*', line)	
	    if r:
		dels = r.group(1)
    return (int(files),int(ins),int(dels))

def diffstat2 ( cset ):
    files = ins = dels = 0
    #fd = os.popen( '/bin/bash -c "hg export %s | diffstat -p1 -f0 | grep \| | grep -v ia64 | grep -v tpmfront | grep -v tpmback | grep -v vmx | grep -v acm | grep -v vtpm | grep -v security | grep -v firmware | grep -v ioemu"' % cset )
    #fd = os.popen( '/bin/bash -c "hg export %s | diffstat -p1 -f0 | grep \| | grep tools/libxc"' % cset )
    fd = os.popen( '/bin/bash -c "hg export %s | diffstat -p1 -f0 | grep \|"' % cset )

    while 1:
	line = fd.readline()
	if len(line) == 0: break
	parts1 = string.split(line,'|')
	parts = string.split(parts1[1])
	files = files + 1
	ins = ins + int(parts[1])
	dels = dels + int(parts[3])
    print int(files),int(ins),int(dels)
    return (int(files),int(ins),int(dels))

#diffstat2(5371)    
#diffstat2(5370)    

eof=0
date = 0

date_cset = {}
date_step = 24*3600*7 

while 1:
    if eof == 1: break
    line = sys.stdin.readline()
    if len(line) == 0: eof=1

    r = re.match('^changeset:   (\d+):([0-9a-f]+)$', line)
    if eof or r :

	if cset and parents < 2 and merge == 'n':

	    t_real = t_real+1

	    if not date_cset.has_key( date ):
		date_cset[date] = (1,0)
	    else:
		(a,b) = date_cset[date]
		date_cset[date] = (a+1,b)

	    (files,ins,dels) = diffstat2( cset )
	    m1 = ins
	    m2 = max(ins-dels,0)

	    # print stats from the last chageset
	    print 'cset:%s Merge:%s%s %s:%s:%s %d:%d Signed:%s Author:%s' % (cset, merge, parents>1 and 'y' or 'n', files, ins, dels, m1, m2, first_signed_off, first_email )
	    	    
	    author = first_signed_off
	    if author == '':
		author = 'unknown@unknown.com'
	    if not stats.has_key( author ):
		stats[author] = (0,0,0,0)
	    (aliases,cins,tm1,tm2) = stats[author]
	    if m1 > 0:
		stats[author] = (aliases, cins+1, tm1+m1, tm2+m2)
	    
	else:
	    t_merges = t_merges+1

	    if not date_cset.has_key( date ):
		date_cset[date] = (0,1)
	    else:
		(a,b) = date_cset[date]
		date_cset[date] = (a,b+1)

	if not eof:
	    cset = r.group(1)
	    cset_long = r.group(2)
	    #print 'XXX:', line, 'YYYY', r.group(1), '***', r.group(2)
	    found_first_email = 0
	    first_email = ''
	    found_first_signed_off = 0
	    first_signed_off = ''
	    merge = 'n'
	    parents = 0

	#print '-----------------', cset
	continue

    r = re.match('^user:        (.*)$', line)
    if r:
	user = r.group(1)
	continue

    r = re.match('^parent:      (.*)$', line)
    if r:
	parents = parents + 1
	continue

    r = re.match('^date:      (.*)$', line)
    if r:
	date = int(time.mktime(rfc822.parsedate(r.group(1))) / date_step)
	#print 'XXXXX %s ::: %d/%d/%d' % (r.group(1), time.gmtime(date*date_step)[0], time.gmtime(date*date_step)[1], time.gmtime(date*date_step)[2])
	continue

    line = string.lower(line)

    r = re.match('^merge', line)
    if r:
	merge = 'y'

    r = re.match('^manual', line)
    if r:
	merge = 'y'

    if not found_first_email:
	r = re.match('.*?([a-z0-9._+]+@[a-z0-9._+]+).*', line)
	if r:
	    first_email = string.strip(r.group(1))
	    found_first_email = 1
	    #print r.group(1)

    if not found_first_signed_off:
	r = re.match('^signed-off-by:.*?([a-z0-9._+]+@[a-z0-9._+]+).*', line)
	if r:
	    first_signed_off = string.strip(r.group(1))
	    found_first_signed_off = 1
	    #print 'SSS', r.group(1)


print '############################################################################'
print 't_real=%d t_merges=%d' % (t_real, t_merges)

keys = stats.keys()
keys.sort()

for k in keys:
    (aliases, cins,m1,m2) = stats[k]
    print '%s cins=%d m1=%d m2=%d' % (k,cins,m1,m2)

print '############################################################################'


for user in alias_list:
    primary = user[0]
    (aliases,cins,m1,m2) = (0,0,0,0)
    if stats.has_key(primary):
	(aliases,cins,m1,m2) = stats[primary]
    for a in user[1:]:
	if stats.has_key(a):
	    (aliases,xcins,xm1,xm2) = stats[a]
	    (aliases,cins,m1,m2) = (aliases+1,cins+xcins,m1+xm1,m2+xm2)
	    del stats[a]
    if cins > 0:
	stats[primary] = (aliases,cins,m1,m2)

print '############################################################################'

keys = stats.keys()
keys.sort()

for k in keys:
    (aliases,cins,m1,m2) = stats[k]
    print '% 16s  aliases=% 4d  cins=% 4d  ins=% 6d  ext=% 6d' % (k,aliases,cins,m1,m2)
    stats[k] = (0,cins,m1,m2)

print '############################################################################'

keys = stats.keys()
for key in keys:
    r = re.match('.*[@.]([^.]+[.][^.]+)', key)
    if r:
	#print '==',r.group(1)
	primary = r.group(1)

    (aliases,cins,m1,m2) = (0,0,0,0)
    if stats2.has_key(primary):
	(aliases,cins,m1,m2) = stats2[primary]

    (xaliases,xcins,xm1,xm2) = stats[key]
    stats2[primary] = (aliases+xaliases+1,cins+xcins,m1+xm1,m2+xm2)
    #del stats[key]
    #print 'del:', key, primary, (xaliases,xcins,xm1,xm2)

print '############################################################################'

print 'total checkins= %d' % (t_real)

keys2 = stats2.keys()

for k in keys2:
    (aliases,cins,m1,m2) = stats2[k]
    print '\n%- 16s  aliases=% 4d  cins=% 4d  ins=% 7d  ext=% 7d' % (k,aliases,cins,m1,m2)
    #print '\n%- 16s  aliases=% 4d  cins=% 4d  ins=% 7d' % (k,aliases,cins,m1)
    for j in keys:
	r = re.match('.*[@.]([^.]+[.][^.]+)', j)
	if r:
	    primary = r.group(1)
	    if primary == k:
		(aliases,cins,m1,m2) = stats[j]
		print '\t%- 22s  cins=% 4d  ins=% 7d  ext=% 7d' % (string.split(j,'@')[0],cins,m1,m2)
		#print '\t%- 22s  cins=% 4d  ins=% 7d' % (string.split(j,'@')[0],cins,m1)

print '############################################################################'


for k in keys2:
    (aliases,cins,m1,m2) = stats2[k]
    print '%- 16s  aliases=% 4d  cins=% 4d  ins=% 7d  ext=% 7d' % (k,aliases,cins,m1,m2)
    #print '\n%- 16s  aliases=% 4d  cins=% 4d  ins=% 7d' % (k,aliases,cins,m1)
    for j in keys:
        r = re.match('.*[@.]([^.]+[.][^.]+)', j)
        if r:
            primary = r.group(1)
            if primary == k:
                (aliases,cins,m1,m2) = stats[j]
                pass
		#print '\t%- 22s  cins=% 4d  ins=% 7d  ext=% 7d' % (string.split(j,'@')[0],cins,m1,m2)
                #print '\t%- 22s  cins=% 4d  ins=% 7d' % (string.split(j,'@')[0],cins,m1)

print '############################################################################'


print 'checkins over time, real and  merges'

keys = date_cset.keys()
keys.sort()

for k in keys:
    f = time.gmtime(k*date_step)
    print '%d/%d/%d	%d	%d' % (f[0],f[1],f[2], date_cset[k][0], date_cset[k][1])

print





	    






