Tuesday, May 11, 2010

Enumerating all of the CSS Classes on a site

One of the projects that I’m working on needs to have an appendix listing all of the CSS classes used on the site, ideally with a cross reference for which page(s) it occurs on. There is no utility build into VS that will extract this (and since some of the class elements may be originating on controls, it’s hard to conceive of such a VS utility).

My solution was simple:

  • Create a page that links to all of the pages in the site
  • Create a console utility that walks the pages and then extracts all of the CSS class name found. This is actually very simple to do using:
  • I store the data in a data table and then save is as XML for subsequence use (for example creating a table in an appendix)

Here’s the code – enhance as needed.

using System;
using System.Net;
using System.Data;
using System.Collections.Generic;
using HtmlAgilityPack;

namespace ScanForCssClasses
{
class Program
{
    static void Main(string[] args)
    {
        if (args.Length > 0)
            GetRoot(args[0]);
        else
        {
            Console.WriteLine("Please supply a url to start from");
        }
    }
    static void GetRoot(string rooturl)
    {
        rooturl=rooturl.ToLower();
        DataTable results = new DataTable();

        DataRow row;
        results.Columns.Add("url");
        results.Columns.Add("class");
        string root=rooturl.Substring(0,rooturl.LastIndexOf("/"));
        
        WebClient wc = new WebClient();
        HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
        doc.LoadHtml(wc.DownloadString(rooturl));

        List<string> pagelist = new List<string>();
        foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//a[@href]"))
        {
            bool add=false;
            var test=link.Attributes["href"].Value.ToLower();
            if(test.StartsWith(root))
                add=true;
            if (!test.Contains(":") && test.Length > 3 && !test.StartsWith("#"))
            {
                test=root+test;
                add=true;
            }
            if(add && !pagelist.Contains(test))
            {
                pagelist.Add(test);
            }
        }
        foreach (string url in pagelist)
        {
            doc.LoadHtml(wc.DownloadString(url));
            foreach (HtmlNode link in doc.DocumentNode.SelectNodes("//*[@class]"))
            {
                string relurl = url.Substring(root.Length);
                string css=link.Attributes["class"].Value;
                if (results.Select(
                    String.Format("[url]='{0}' and [class]='{1}'",relurl,css)
                    ).Length == 0)
                {
                    results.Rows.Add(row = results.NewRow());
                    row["url"] = relurl;
                    row["class"] = css;
                }
            }
        }
        DataSet set = new DataSet();
        set.Tables.Add(results);
        set.WriteXml("PageCss.xml");
    }
}
}

2 comments: