Difference between revisions of "S2 CSS: cssexplorer.py"
From Dreamwidth Notes
Foxfirefey (Talk | contribs) (Created page with 'This is a quick little script that dumps out all HTML tags containing ids or divs. Requires [http://www.crummy.com/software/BeautifulSoup/ BeautifulSoup]. <source lang="python"...') |
Foxfirefey (Talk | contribs) |
||
(One intermediate revision by the same user not shown) | |||
Line 1: | Line 1: | ||
This is a quick little script that dumps out all HTML tags containing ids or divs. Requires [http://www.crummy.com/software/BeautifulSoup/ BeautifulSoup]. | This is a quick little script that dumps out all HTML tags containing ids or divs. Requires [http://www.crummy.com/software/BeautifulSoup/ BeautifulSoup]. | ||
+ | |||
+ | Note: on entry pages, it seems to die. Go and delete the one <script> section on the line it says it's erroring out on and it should work. | ||
<source lang="python">#!/usr/bin/python | <source lang="python">#!/usr/bin/python | ||
− | import sys, string | + | import sys, os, re, string |
from optparse import OptionParser | from optparse import OptionParser | ||
from BeautifulSoup import BeautifulSoup | from BeautifulSoup import BeautifulSoup | ||
+ | from sets import Set | ||
+ | |||
+ | tag_ids = list() | ||
+ | tag_classes = Set() | ||
def navigateClassesAndIDs(item, level): | def navigateClassesAndIDs(item, level): | ||
Line 28: | Line 34: | ||
if 'id' in item.attrs[0]: | if 'id' in item.attrs[0]: | ||
item_id = '#' + item['id'] | item_id = '#' + item['id'] | ||
+ | tag_ids.append(item_id) | ||
if 'class' in item.attrs[0]: | if 'class' in item.attrs[0]: | ||
item_classes = ' '.join(['.'+item_class for item_class in item['class'].split()]) | item_classes = ' '.join(['.'+item_class for item_class in item['class'].split()]) | ||
+ | [tag_classes.add(item_class) for item_class in item['class'].split()] | ||
if item_id == None and item_classes == None: | if item_id == None and item_classes == None: | ||
Line 49: | Line 57: | ||
parser.add_option("-i", "--infile", dest="infile", | parser.add_option("-i", "--infile", dest="infile", | ||
help="The input file", metavar="INFILE") | help="The input file", metavar="INFILE") | ||
− | + | ||
(options, args) = parser.parse_args() | (options, args) = parser.parse_args() | ||
Line 63: | Line 71: | ||
body = soup.find('body') | body = soup.find('body') | ||
− | navigateClassesAndIDs(body, 0)</source> | + | navigateClassesAndIDs(body, 0) |
+ | |||
+ | print "ALL IDS, in order: \n\t%s" % "\n\t".join(tag_ids) | ||
+ | print "ALL CLASSES, alphabetized: \n\t%s" % "\n\t".join(sorted(tag_classes))</source> | ||
[[Category: S2 CSS]] | [[Category: S2 CSS]] |
Latest revision as of 00:45, 27 April 2009
This is a quick little script that dumps out all HTML tags containing ids or divs. Requires BeautifulSoup.
Note: on entry pages, it seems to die. Go and delete the one <script> section on the line it says it's erroring out on and it should work.
#!/usr/bin/python import sys, os, re, string from optparse import OptionParser from BeautifulSoup import BeautifulSoup from sets import Set tag_ids = list() tag_classes = Set() def navigateClassesAndIDs(item, level): """A recursive function that dumps all classes and IDs""" printClassesAndIDs(item, level) if 'contents' in item.__dict__ and len(item.contents) > 0: navigateClassesAndIDs(item.contents[0], level+1) if item.nextSibling: navigateClassesAndIDs(item.nextSibling, level) def printClassesAndIDs(item, level): item_id = None item_classes = None if not 'attrs' in item.__dict__ or len(item.attrs) == 0: return if 'id' in item.attrs[0]: item_id = '#' + item['id'] tag_ids.append(item_id) if 'class' in item.attrs[0]: item_classes = ' '.join(['.'+item_class for item_class in item['class'].split()]) [tag_classes.add(item_class) for item_class in item['class'].split()] if item_id == None and item_classes == None: return if item_id == None: item_id = "" if item_classes == None: item_classes = "" tab = " " * level print "L %2d: %s<%s> %s %s" % (level, tab, item.name, item_id, item_classes) if __name__ == '__main__': parser = OptionParser() parser.add_option("-i", "--infile", dest="infile", help="The input file", metavar="INFILE") (options, args) = parser.parse_args() if options.infile: InFile = options.infile else: print >> sys.stderr, "Error: no input file to load!" parser.print_help() sys.exit(0) page = open(InFile, 'r') soup = BeautifulSoup(page) body = soup.find('body') navigateClassesAndIDs(body, 0) print "ALL IDS, in order: \n\t%s" % "\n\t".join(tag_ids) print "ALL CLASSES, alphabetized: \n\t%s" % "\n\t".join(sorted(tag_classes))