#!/usr/bin/python
"""
Display python profile results on an easy-to-read web page.

bar technique from:
http://www.cssplay.co.uk/menu/barchart.html

todo:
hover on filenames for full path, or link to full path

"""
from __future__ import division
import sys, os
from optparse import OptionParser
from cPickle import dump, load
from nevow import flat, tags as T, json
import pstats
import hotshot.stats

def getStats(filename):
    """here is where to detect the difference between pstats and
    hotshot stats and call the appropriate loader. But I haven't cared
    about non-hotshot yet"""
    return hotshot.stats.load(filename)
    #return pstats.Stats(filename)

class Callee(object):
    groupThreshold = 3
    noFilename = "No filename"

    def __init__(self, function, stats, group=None):
        self.filename, self.linenum, self.function = function

        (self.cpercall, self.ncalls, self.tottime,
         self.cumtime, callers) = stats

        if self.ncalls != 0:
            self.tpercall = (self.tottime/self.ncalls)
        else:
            self.tpercall = 0

        self.callers = []

        for caller, calls in callers.iteritems():
            self.callers.append(Caller(caller, calls))

        self._getGroup()

    def _getGroup(self):
        if not self.filename:
            self.filename = self.noFilename
            self.groups = set([self.noFilename])
            return
        
        self.groups = set()
        for x in xrange(1, self.groupThreshold+1):
            dirlist = os.path.dirname(self.filename).split(os.sep)
            tailParts = dirlist[-x:]
            while (len(tailParts) > 1 and
                   tailParts[0] in ['lib', 'python2.4', 'site-packages',
                                    'build']):
                tailParts.pop(0)
            self.groups.add(os.sep.join(tailParts))

        self.groups.difference_update(set([
            'python', 'lib/python2.4', 'python2.4'
            ]))
        
        if self.groups == set(['']):
            self.groups = set([self.filename])


class Caller(Callee):
    def __init__(self, caller, calls):
        self.filename, self.linenum, self.function = caller 
        self.calls = calls

        self._getGroup()

def maxWithoutOutlier(L):
    """attempt to throw out large outliers in L. Currently, an outlier
    is a vaule that's more than 5x the next smaller value.

    current version is not great on [1,1,1,1,1,2,7,8], where i think
    the answer is 2
    """
    L = sorted(L)
    while len(L) > 2 and L[-1] > 5 * L[-2]:
        L.pop()
    return L[-1]

def bar(val, maxval):
    return T.div(style="width: %.1f%%" % (min(100, val / maxval * 100)))

def parseProfile(filename):
    s = getStats(filename)

    funcs = []
    for func, stats in s.stats.iteritems():
        funcs.append(Callee(func, stats))

    return funcs

def rtrimTo(s, maxLen):
    if len(s) > maxLen:
        return "..." + s[-maxLen+3:]
    return s

def formatJson(funcs):
    maxTottime = maxWithoutOutlier([f.tottime for f in funcs])
    maxCumtime = maxWithoutOutlier([f.cumtime for f in funcs])
    items = []
    for func in funcs:
        item = {
            u'cpercall' : func.cpercall,
            u'ncalls' : func.ncalls,
            u'tottime' : float("%.3f" % func.tottime),
            u'cumtime' : float("%.3f" % func.cumtime),
            u'tpercall' : float("%.4f" % func.tpercall),
            
            u'group' : map(unicode, func.groups),
            u'function' : unicode(func.function),
            u'filename' : unicode(func.filename),
            u'label' : u'%s (%s:%s)' % (func.function, rtrimTo(func.filename, 40), func.linenum),
            u'tottimeWidth' : u"%.1f%%" % (min(100, func.tottime / maxTottime * 100)),
            u'cumtimeWidth' : u"%.1f%%" % (min(100, func.cumtime / maxCumtime * 100)),
            }
        
        items.append(item)

    return json.serialize({u"items" : items})

def groupFilename(filename):
    cwd = os.path.abspath(os.getcwd()) + os.sep

    if filename == '<string>':
        return "stringCode", filename

    stdlib = os.path.dirname(os.__file__) + os.sep
    if filename.startswith(stdlib) and not filename.startswith(stdlib + 'site-packages' + os.sep):
        label = "(stdlib) "
        if '/' in filename[len(stdlib):]:
            label = ""
        return "stdlib", label + filename[len(stdlib):]
    
    sitePkg = os.path.join(stdlib, 'site-packages/')
    if filename.startswith(sitePkg):
        return "site-packages", filename[len(sitePkg):]

    return "pkg-unknown", filename

def style(filename):
    rgb = [int(x) / 10 for x in str(hash(filename))[-3:]]
    return "background: #%01x%01x%01x" % tuple(14 - x * 3 for x in rgb)

def annotateCssNames(funcs):
    """add more attributes to each func:

    cssClass - 'stdlib', 'site-packages', 'pkg-unknown', 'pkg-{dirname}'
    shortName - for displaying
    suffix - internal use

    returns additional CSS for coloring the pkg-{dirname} styles
    """
    prefixesToRemove = {} # prefix : count

    for f in funcs:
        cssClass, shortName = groupFilename(os.path.abspath(f.filename))
        if cssClass == 'pkg-unknown':
            parts = f.filename.split(os.sep)
            # don't consider a single-piece prefix like 'foo/bar.py'
            for i in range(2, len(parts)):
                prefix = os.sep.join(parts[:i])
                prefixesToRemove[prefix] = prefixesToRemove.get(prefix, 0) + 1

    prefixes = sorted(prefixesToRemove, key=lambda p: -len(p))

    classes = set()
    for f in funcs:
        suffix = f.filename
        for prefix in prefixes:
            if suffix.startswith(prefix):
                prefix = prefix[:prefix[:-1].rfind(os.sep)]
                suffix = suffix[len(prefix)+1:]
                break
        f.suffix = suffix

        cssClass, shortName = groupFilename(f.suffix)

        if cssClass == 'pkg-unknown' and os.sep in f.suffix:
            cssClass = 'pkg-%s' % f.suffix[:f.suffix.find(os.sep)]
            classes.add(cssClass)

        f.cssClass = cssClass
        f.shortName = shortName

    css = ""
    for cssClass in classes:
        css += 'td.%s { %s }\n' % (cssClass, style(cssClass))
    return css

def formatHtml(funcs):
    maxTottime = maxWithoutOutlier([f.tottime for f in funcs])
    maxCumtime = maxWithoutOutlier([f.cumtime for f in funcs])

    moreCss = annotateCssNames(funcs)
    
    rows = []
    for f in funcs:
        if f.function != 'profiler':          
            location = [T.span(class_="fileLinenum")[
                         T.a(href=f.filename)[f.shortName], ":%d" % f.linenum],
                        " %s" % f.function]
            rows.append([T.tr[T.td[f.ncalls],
                              T.td[bar(f.tottime, maxTottime)[
                                      "%.3f" % f.tottime]],
                              T.td["%.3f" % (f.tpercall,)],
                              T.td[bar(f.cumtime, maxCumtime)[
                                      "%.3f" % f.cumtime]],
                              T.td[f.cpercall],
                              T.td(class_=f.cssClass)[location]],
                         '\n'])

    return flat.flatten(T.html[
        T.head[
        T.style(type="text/css")['''
    table {
      width: 100%;

    }
    table, td, th {
      border: 1px solid black;
      border-collapse: collapse;
      padding: 2px;
      white-space: nowrap;
    }

    div {
      background: #72C8A1;
    }
    td.stdlib {
      background: #99CCFF;
    }
    td.site-packages {
      background: #E8C98B;
    }

    span.fileLinenum {
      color: gray;
      font-size: 90%;
    }
    span.fileLinenum a {
      color: #444;
    }
    ''', moreCss]
        ],
        T.body[
        T.table[
        T.tr[T.th['ncalls'],
             T.th(width="30%")['tottime'],
             T.th['tpercall'],
             T.th(width="30%")['cumtime'],
             T.th['cpercall'],
             T.th['filename:linenum(function)']],
        rows
        ]
        ]
        ])

def cacheFuncs(funcsGet):
    """while you're working on this program, it's nice to not reload
    the slow stats every time"""
    f = "/tmp/stat"
    if not os.path.exists(f):
        funcs = funcsGet()
        dump(funcs, open(f, "w"))
        return funcs
    return load(open(f))

if __name__ == '__main__':
    parser = OptionParser("%prog pstats_file ... [options]")
    parser.add_option("-o", "--output", help="output plain html to this file")
    parser.add_option("-j", "--output-json",
                      help="output exhibit data to this file")
    parser.add_option("-m", "--max-rows", type="int",
                 help="max rows to show, when sorted by decreasing total time")
    opts, args = parser.parse_args()

    if 0: # for quick startups while testing
        funcs = cacheFuncs(lambda: parseProfile(args[0]))
    else:                          
        funcs = parseProfile(args[0])

    funcs.sort(key=lambda f: f.tottime, reverse=True)
    if opts.max_rows:
        funcs = funcs[:opts.max_rows]

    if opts.output_json:
        f = open(opts.output_json, 'w')
        f.write(formatJson(funcs))
        f.close()
    if opts.output:
        f = open(opts.output, 'w')
        f.write(formatHtml(funcs))
        f.close()

