Difference between revisions of "S2 CSS: cssexplorer.py"

From Dreamwidth Notes
Jump to: navigation, search
(Created page with 'This is a quick little script that dumps out all HTML tags containing ids or divs. Requires [http://www.crummy.com/software/BeautifulSoup/ BeautifulSoup]. <source lang="python"...')
 
 
(One intermediate revision by the same user not shown)
Line 1: Line 1:
 
This is a quick little script that dumps out all HTML tags containing ids or divs.  Requires [http://www.crummy.com/software/BeautifulSoup/ BeautifulSoup].
 
This is a quick little script that dumps out all HTML tags containing ids or divs.  Requires [http://www.crummy.com/software/BeautifulSoup/ BeautifulSoup].
 +
 +
Note: on entry pages, it seems to die.  Go and delete the one &lt;script&gt; section on the line it says it's erroring out on and it should work.
  
 
<source lang="python">#!/usr/bin/python
 
<source lang="python">#!/usr/bin/python
  
import sys, string
+
import sys, os, re, string
 
from optparse import OptionParser
 
from optparse import OptionParser
 
from BeautifulSoup import BeautifulSoup
 
from BeautifulSoup import BeautifulSoup
 +
from sets import Set
 +
 +
tag_ids = list()
 +
tag_classes = Set()
  
 
def navigateClassesAndIDs(item, level):
 
def navigateClassesAndIDs(item, level):
Line 28: Line 34:
 
     if 'id' in item.attrs[0]:
 
     if 'id' in item.attrs[0]:
 
         item_id = '#' + item['id']
 
         item_id = '#' + item['id']
 +
        tag_ids.append(item_id)
 
      
 
      
 
     if 'class' in item.attrs[0]:
 
     if 'class' in item.attrs[0]:
 
         item_classes = ' '.join(['.'+item_class for item_class in item['class'].split()])
 
         item_classes = ' '.join(['.'+item_class for item_class in item['class'].split()])
 +
        [tag_classes.add(item_class) for item_class in item['class'].split()]
 
          
 
          
 
     if item_id == None and item_classes == None:
 
     if item_id == None and item_classes == None:
Line 49: Line 57:
 
     parser.add_option("-i", "--infile", dest="infile",
 
     parser.add_option("-i", "--infile", dest="infile",
 
         help="The input file", metavar="INFILE")
 
         help="The input file", metavar="INFILE")
 
+
 
     (options, args) = parser.parse_args()
 
     (options, args) = parser.parse_args()
  
Line 63: Line 71:
 
     body = soup.find('body')   
 
     body = soup.find('body')   
 
      
 
      
     navigateClassesAndIDs(body, 0)</source>
+
     navigateClassesAndIDs(body, 0)
 +
   
 +
    print "ALL IDS, in order: \n\t%s" % "\n\t".join(tag_ids)
 +
    print "ALL CLASSES, alphabetized: \n\t%s" % "\n\t".join(sorted(tag_classes))</source>
  
 
[[Category: S2 CSS]]
 
[[Category: S2 CSS]]

Latest revision as of 00:45, 27 April 2009

This is a quick little script that dumps out all HTML tags containing ids or divs. Requires BeautifulSoup.

Note: on entry pages, it seems to die. Go and delete the one <script> section on the line it says it's erroring out on and it should work.

#!/usr/bin/python
 
import sys, os, re, string
from optparse import OptionParser
from BeautifulSoup import BeautifulSoup
from sets import Set
 
tag_ids = list()
tag_classes = Set()
 
def navigateClassesAndIDs(item, level):
    """A recursive function that dumps all classes and IDs"""
 
    printClassesAndIDs(item, level)
 
    if 'contents' in item.__dict__ and len(item.contents) > 0:
        navigateClassesAndIDs(item.contents[0], level+1)
 
    if item.nextSibling:
        navigateClassesAndIDs(item.nextSibling, level)
 
def printClassesAndIDs(item, level):
 
    item_id = None
    item_classes = None
 
    if not 'attrs' in item.__dict__ or len(item.attrs) == 0:
        return
 
    if 'id' in item.attrs[0]:
        item_id = '#' + item['id']
        tag_ids.append(item_id)
 
    if 'class' in item.attrs[0]:
        item_classes = ' '.join(['.'+item_class for item_class in item['class'].split()])
        [tag_classes.add(item_class) for item_class in item['class'].split()]
 
    if item_id == None and item_classes == None:
        return
 
    if item_id == None:
        item_id = ""
 
    if item_classes == None:
        item_classes = ""
 
    tab = "    " * level
 
    print "L %2d: %s<%s> %s %s" % (level, tab, item.name, item_id, item_classes)
 
if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option("-i", "--infile", dest="infile",
        help="The input file", metavar="INFILE")
 
    (options, args) = parser.parse_args()
 
    if options.infile:
        InFile = options.infile
    else:
        print >> sys.stderr, "Error: no input file to load!"
        parser.print_help()
        sys.exit(0)
 
    page = open(InFile, 'r')
    soup = BeautifulSoup(page)
    body = soup.find('body')   
 
    navigateClassesAndIDs(body, 0)
 
    print "ALL IDS, in order: \n\t%s" % "\n\t".join(tag_ids)
    print "ALL CLASSES, alphabetized: \n\t%s" % "\n\t".join(sorted(tag_classes))