# encoding: utf-8 
import urllib, re, os, sys, codecs
from BeautifulSoup import BeautifulSoup, NavigableString

def open(fn, *args):
    print >> sys.stderr, '>>', fn
    return codecs.open(fn, *args, encoding='utf-8')

class f:
    def __init__(self, fn): self.fn = fn
    def __enter__(self): self.temp, sys.stdout = sys.stdout, open(self.fn, 'w')
    def __exit__(self, *args): sys.stdout = self.temp; return False

def grab(path):
    print >> sys.stderr, '<<',
    f = urllib.urlopen('http://nomic.bob-space.com/' + path)
    z = f.read()
    f.close()
    print >> sys.stderr, path
    return z

def write(path, text):
    f = open(path, 'w')
    f.write(text.decode('utf-8'))
    f.close()

#def plaintext(x):
#    #return ''.join(i.strip() for i in x.fetchText(text=True))
#    return re.sub('\s*\n+\s*', '\n', ''.join(x.findAll(text=True)).strip())
def plaintext(x):
    if x.__class__ == NavigableString:
        return x.strip()
    elif x.name == 'br':
        return '\n'
    elif x.name in ('h1', 'h2', 'h3', 'h4', 'h5'):
        return '\n\n' + ''.join(map(plaintext, x.contents)) + '\n' + '-'*40 + '\n'
    else:
        return ''.join(map(plaintext, x.contents))

def nround(x): return x # todo make this better

def tbl(t):
    if len(t.findAll('table')) > 0: return
    stuffs = []
    alen = None
    for tr in t.findAll('tr'):
        stuff = [plaintext(td) for td in tr.findAll('td')]
        if ''.join(stuff) == '': continue
        if alen is not None and alen != len(stuff): continue
        alen = len(stuff)
        stuffs.append(stuff)
    if len(stuffs) == 0: return
    sizes = [nround(max(len(stuff[col]) for stuff in stuffs)) for col in xrange(alen)]
    n = 0
    for stuff in stuffs:
        print '   '.join(stuff[col].ljust(sizes[col]) for col in xrange(alen))
        if n == 0: print '-' * (sum(sizes) + 3*(len(sizes) - 1))
        n += 1
    print

def doStuff(soup):
    els = soup.findAll(['table', 'h1', 'h2', 'h3', 'h4', 'h5', 'span'])
    for e in els:
        if e.name == 'table':
            tbl(e)
        elif e.name[0] == 'h':
            print e.renderContents()
            print
        else:
            try:
                if e['class'] == 'log':
                    print plaintext(e)
            except KeyError: pass

os.chdir('reports')
os.system('find . -name "*.txt" -exec co -l {} \;')

write('aaa.txt', grab('agorareport.aspx?contract=AAA'))
with f('accountor.txt'):
    doStuff(BeautifulSoup(grab('agoralog.aspx?contract=Accountor')))
write('alldecks.txt', grab('agorareport.aspx?contract=All%20Decks'))
with f('herald.txt'):
    doStuff(BeautifulSoup(grab('agoralog.aspx?contract=Herald')))
write('insulator.txt', grab('agorareport.aspx?contract=Insulator'))
write('scorekeepor.txt', grab('agorareport.aspx?contract=Scorekeepor'))
g = grab('agoralog.aspx?contract=Notary')
with f('notary.txt'):
    doStuff(BeautifulSoup(g))
cids = set(map(int, re.findall('contractID=([0-9]+)', g)))
for cid in cids:
    soup = BeautifulSoup(grab('viewcontract.aspx?contractID=%d' % cid))
    p = plaintext(soup)
    name = p[:p.find('Home-Reports-Control Panel')]
    p = p[p.find('Parties'):p.find('Home\nSystem Time')]
    filename = 'contracts/' + re.sub('[^a-zA-Z0-9]', '-', name.lower()) + '.txt'
    with f(filename):
        print name
        print
        print p

# recommend diff -b to avoid spurious diff results based on changing white space
os.system('find . -name "*.txt" -exec ci -t-. -m. -u {} \;')

