#!/usr/bin/env python ''' Read two shell files with specific formatting to parse out common records and produce and HTML table. It is used to extract the common records from a bash shell script file and tcsh shell script file. The idea is to create records in the bash and tcsh files with the same id so that this script can scoop them up and put them in a table. Records are surrounded by: # BEGIN: # END The id can be any string but it must be the same in both files. Anything in between is printed. THe #@@ token is used to specify non-executing records. Here is a simple example: $ # ================================================================ $ # bash commands $ # ================================================================ $ cat compare.sh < compare.html ''' import sys import os import re # ================================================================ # err # ================================================================ def err(msg): ''' Print an error message and exit. ''' print('ERROR: %s' % (msg)) sys.exit(1) # ================================================================ # err # ================================================================ def parse(fn): ''' Parse a shell script file looking for specific format instructions in the comments. ''' fp = open(fn,'r') rows = {} row = None n=0 for line in fp.readlines(): n += 1 line = line.rstrip() # begin m = re.search('^# BEGIN: (.+)$',line) if m: if row != None: err('missing END statement around line: %d in %s' % (n,fn)) title = m.group(1) if title in rows: err('duplicate entry "%s" at line %d in %s' % (title,n,fn)) row = {'title':title, 'file':fn, 'lineno':n, 'lines':[], } continue # end m = re.search('^# END',line) if m: rows[title]=row row = None continue # between begin and end, everything else is ignored if row: row['lines'].append(line) fp.close() return rows # ================================================================ # htmlize # ================================================================ def htmlize(line): ''' HTMLize a string. ''' line = line.replace('&','&') line = line.replace('"','"') line = line.replace("'",''') line = line.replace('<','<') line = line.replace('>','>') line = line.replace(' ',' ') line = line.replace('`','`') line = line.replace('-','–') return line # ================================================================ # myprint # ================================================================ def myprint(m): ''' Write a string to stdout. ''' sys.stdout.write(str(m)) # ================================================================ # cell # ================================================================ def cell(indent, rec): ''' Write out a column cell. ''' myprint('%s' % (indent)) first=True for line in rec['lines']: if not first: myprint('
') first = False line = line.replace('#@@ ','') myprint(htmlize(line)) myprint('\n') # ================================================================ # gencss # ================================================================ def gencss(): ''' Generate the CSS styling information specifically for wordpress. ''' css =''' ''' return css # ================================================================ # main # ================================================================ def main(): ''' Main entry point. ''' # This is a simple script, no need for fancy argument handling. assert len(sys.argv) == 3 bash_shell = sys.argv[1] tcsh_shell = sys.argv[2] bash_recs = parse(bash_shell) tcsh_recs = parse(tcsh_shell) if len(bash_recs) != len(tcsh_recs): err('Mismatched number of records: %d != %d' % (len(bash_recs), len(tcsh_recs))) for title in sorted(bash_recs): if title not in tcsh_recs: err('missing entry "%s"' % (title)) myprint(gencss()) print '''
''' tr_indent = ' ' td_indent = tr_indent + ' ' i=0 for title in sorted(bash_recs, key=lambda x: x.lower()): bash_rec=bash_recs[title] tcsh_rec=tcsh_recs[title] if (i%2) == 0: myprint('%s\n' % (tr_indent)) else: myprint('%s\n' % (tr_indent)) myprint('%s\n' % (td_indent, i+1)) myprint('%s\n' % (td_indent, title)) cell(td_indent, bash_rec) cell(td_indent, tcsh_rec) myprint('%s\n' % (tr_indent)) i += 1 print '''
ID Operation bash tcsh
%d%s
''' if __name__ == '__main__': main()