Validate HTML5 in C# Validate HTML5 in C# selenium selenium

Validate HTML5 in C#


After spending an entire weekend on this problem, the only solution I can see is a commercial library called CSE HTML Validator

It is located here http://www.htmlvalidator.com/htmldownload.html

I wrote a simple wrapper for it. Here is the code

using Newtonsoft.Json;using System;using System.Collections.Generic;using System.Diagnostics;using System.IO;using System.Linq;[assembly: CLSCompliant(true)]namespace HtmlValidator{public class Validator{    #region Constructors...    public Validator(string htmlToValidate)    {        HtmlToValidate = htmlToValidate;        HasExecuted = false;        Errors = new List<ValidationResult>();        Warnings = new List<ValidationResult>();        OtherMessages = new List<ValidationResult>();    }    #endregion    #region Properties...    public IList<ValidationResult> Errors { get; private set; }    public bool HasExecuted { get; private set; }    public string HtmlToValidate { get; private set; }    public IList<ValidationResult> OtherMessages { get; private set; }    public string ResultsString { get; private set; }    public string TempFilePath { get; private set; }    public IList<ValidationResult> Warnings { get; private set; }    #endregion    #region Public methods...    public void ValidateHtmlFile()    {        WriteTempFile();        ExecuteValidator();        DeleteTempFile();        ParseResults();        HasExecuted = true;    }    #endregion    #region Private methods...    private void DeleteTempFile()    {        TempFilePath = Path.GetTempFileName();        File.Delete(TempFilePath);    }    private void ExecuteValidator()    {        var psi = new ProcessStartInfo(GetHTMLValidatorPath())        {            RedirectStandardInput = false,            RedirectStandardOutput = true,            RedirectStandardError = false,            UseShellExecute = false,            Arguments = String.Format(@"-e,(stdout),0,16 ""{0}""", TempFilePath)        };        var p = new Process        {            StartInfo = psi        };        p.Start();        var stdOut = p.StandardOutput;        ResultsString = stdOut.ReadToEnd();    }    private static string GetHTMLValidatorPath()    {        return @"C:\Program Files (x86)\HTMLValidator120\cmdlineprocessor.exe";    }    private void ParseResults()    {        var results = JsonConvert.DeserializeObject<dynamic>(ResultsString);        IList<InternalValidationResult> messages = results.messages.ToObject<List<InternalValidationResult>>();        foreach (InternalValidationResult internalValidationResult in messages)        {            ValidationResult result = new ValidationResult()            {                Message = internalValidationResult.message,                LineNumber = internalValidationResult.linenumber,                MessageCategory = internalValidationResult.messagecategory,                MessageType = internalValidationResult.messagetype,                CharLocation = internalValidationResult.charlocation            };            switch (internalValidationResult.messagetype)            {                case "ERROR":                    Errors.Add(result);                    break;                case "WARNING":                    Warnings.Add(result);                    break;                default:                    OtherMessages.Add(result);                    break;            }        }    }    private void WriteTempFile()    {        TempFilePath = Path.GetTempFileName();        StreamWriter streamWriter = File.AppendText(TempFilePath);        streamWriter.WriteLine(HtmlToValidate);        streamWriter.Flush();        streamWriter.Close();    }    #endregion}}public class ValidationResult{    public string MessageType { get; set; }    public string MessageCategory { get; set; }    public string Message { get; set; }    public int LineNumber { get; set; }    public int CharLocation { get; set; }    public override string ToString()    {        return String.Format("{0} Line {1} Char {2}:: {3}", this.MessageType, this.LineNumber, this.CharLocation, this.Message);    }}public class InternalValidationResult{    /*     * DA: this class is used as in intermediate store of messages that come back from the underlying validator. The fields must be cased as per the underlying Json object.     * That is why they are ignored.     */    #region Properties...    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "charlocation"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "charlocation")]    public int charlocation { get; set; }    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "linenumber"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "linenumber")]    public int linenumber { get; set; }    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "message"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "message")]    public string message { get; set; }    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "messagecategory"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "messagecategory")]    public string messagecategory { get; set; }    [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1709:IdentifiersShouldBeCasedCorrectly", MessageId = "messagetype"), System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1704:IdentifiersShouldBeSpelledCorrectly", MessageId = "messagetype")]    public string messagetype { get; set; }    #endregion}

Usage/Testing

   private const string ValidHtml = "<!DOCType html><html><head></head><body><p>Hello World</p></body></html>";    private const string BrokenHtml = "<!DOCType html><html><head></head><body><p>Hello World</p></body>";    [TestMethod]    public void CanValidHtmlStringReturnNoErrors()    {        Validator subject = new Validator(ValidHtml);        subject.ValidateHtmlFile();        Assert.IsTrue(subject.HasExecuted);        Assert.IsTrue(subject.Errors.Count == 0);    }    [TestMethod]    public void CanInvalidHtmlStringReturnErrors()    {        Validator subject = new Validator(BrokenHtml);        subject.ValidateHtmlFile();        Assert.IsTrue(subject.HasExecuted);        Assert.IsTrue(subject.Errors.Count > 0);        Assert.IsTrue(subject.Errors[0].ToString().Contains("ERROR"));    }


It looks like this link may have what you want: Automated W3C Validation

You can download a markup validator in the accepted answer and pass your HTML to that. Sorry they're not .NET assemblies :/, but you could wrap it in a DLL if you really wanted to.

Also, one of the answers on this question suggests that the W3C service actually exposes a RESTful API, but can return a SOAP response: How might I use the W3C Markup Validator API in my .NET application?


The best HTML5 validator, the nu checker, is in Java and hard to interface with from .NET. But libtidy can be written into a C++ dll to be called from managed code. The sample program they've posted did a good job for me, with a little adapting.

LibTidy.h:

public ref class LibTidy{public:    System::String^ __clrcall Test(System::String^ input);};

LibTidy.cpp:

System::String^ __clrcall LibTidy::Test(System::String^ input){    CStringW cstring(input);      const size_t newsizew = (cstring.GetLength() + 1) * 2;    char* nstringw = new char[newsizew];    size_t convertedCharsw = 0;    wcstombs_s(&convertedCharsw, nstringw, newsizew, cstring, _TRUNCATE);        TidyBuffer errbuf = { 0 };        int rc = -1;        Bool ok;        TidyDoc tdoc = tidyCreate();                     // Initialize "document"                        ok = tidyOptSetBool(tdoc, TidyShowInfo, no);        ok = tidyOptSetBool(tdoc, TidyQuiet, yes);        ok = tidyOptSetBool(tdoc, TidyEmacs, yes);        if (ok)            rc = tidySetErrorBuffer(tdoc, &errbuf);      // Capture diagnostics        if (rc >= 0)            rc = tidyParseString(tdoc, nstringw);           // Parse the input        if (rc >= 0)            rc = tidyCleanAndRepair(tdoc);               // Tidy it up!        if (rc >= 0)            rc = tidyRunDiagnostics(tdoc);               // Kvetch        char* outputBytes = (char*)errbuf.bp;            if (errbuf.allocator != NULL) tidyBufFree(&errbuf);        tidyRelease(tdoc);        return gcnew System::String(outputBytes);    }