Generate and validate Google XML sitemap with Asp.net

Looking to dynamically create a Google friendly sitemap for your site with a list of URLs fetched from somewhere (a db for instance) ?

The sample below is using an Asp.net WebForms site (it’s what I’m currently using at work) but it can be easily adapted for an MVC site.

Here for the source code.

Scope

Serve a valid sitemap when yoursite.com/sitemap.xml get hit.

  1. The sitemap is generated on the fly from a list of URLs.
  2. The sitemap must be valid according to its XSD schema.

Code

The SitemapGenerator calls the GetSiteUris of an injected repository and wraps the output as a sitemap or as plain text.

public interface ISitemapGenerator
{
ISitemapGenerator Generate();
string AsXmlSitemap();
string AsPlainText();
}
public class SitemapGenerator : ISitemapGenerator
{
private List<string> _uris;
private IUriRepository _repository;

public SitemapGenerator(IUriRepository repository)
{
_repository = repository;
}

public ISitemapGenerator Generate()
{
_uris = _repository.GetSiteUris();

return this;
}

public string AsXmlSitemap()
{
XNamespace xmlNS = "http://www.sitemaps.org/schemas/sitemap/0.9";
var xmlDoc = new XDocument(
new XDeclaration("1.0", "UTF-8", null),
new XElement(xmlNS + "urlset",
from url in _uris
select
new XElement(xmlNS + "url",
new XElement(xmlNS + "loc", url))
));

return xmlDoc.ToString();
}

public string AsPlainText()
{
var stringBuilder = new StringBuilder();
foreach (var uri in _uris)
{
stringBuilder.AppendLine(uri);
}

return stringBuilder.ToString();
}
}

[/sourcecode]

We can add priority and changefreq by extending the Linq to xml select

select
new XElement(xmlNS + "url",
new XElement(xmlNS + "loc", url),
new XElement(xmlNS + "priority", "1"),....

The sitemap can now be served, we’re using an handler to skip the unnecessary page lifecycle.

public class MySitemap : IHttpHandler
{
private ISitemapGenerator _sitemapGenerator = null;

public MySitemap()
{
var containerProvider = (IContainerProviderAccessor)HttpContext.Current.ApplicationInstance;
_sitemapGenerator = containerProvider.ContainerProvider.ApplicationContainer.Resolve<ISitemapGenerator>();
}

public void ProcessRequest(HttpContext context)
{
var xmlSitemap = _sitemapGenerator.Generate().AsXmlSitemap();

//Setting up the response
context.Response.Clear();
context.Response.ContentType = "text/xml";
context.Response.Write(xmlSitemap);
}

public bool IsReusable
{
get
{
return true;
}
}
}

Tests

Finally the interesting part: validate the XML sitemap against its schema:

[TestFixture]
public class SitemapGeneratorFixture
{
private Mock<IUriRepository> _repository = null;
private ISitemapGenerator _sitemapGenerator = null;

[SetUp]
private void Setup()
{
_repository = new Mock<IUriRepository>();
_sitemapGenerator = new SitemapGenerator(_repository.Object);
}

[Test]
public void ConvertToXmlSitemap_Check_PlainText()
{
//Arrange
_repository.Setup(x => x.GetSiteUris()).Returns(new List<string>
{
"http://www.dummysite.com",
"http://www.dummysite.com/home",
"http://www.dummysite.com/contacts",
"http://www.dummysite.com/about",
"http://www.dummysite.com/portfolio"
});

// Act
var result = _sitemapGenerator.Generate().AsPlainText();
//Assert
Assert.That(result.Contains("www.dummysite.com"));
}
[Test]
public void ConvertToXmlSitemap_Check_XmlSitemap_IsValid()
{
//Arrange
_repository.Setup(x => x.GetSiteUris()).Returns(new List<string>
{
"http://www.dummysite.com",
"http://www.dummysite.com/home",
"http://www.dummysite.com/contacts",
"http://www.dummysite.com/about",
"http://www.dummysite.com/portfolio"
});

// Act
var result = _sitemapGenerator.Generate().AsXmlSitemap();
//Assert
Assert.IsTrue(CheckIfSchemaIsValid(result));
}

[Test]
[ExpectedException(typeof(XmlSchemaValidationException))]
public void ConvertToXmlSitemap_Check_XmlSitemap_IsInValid()
{
//Arrange
_repository.Setup(x => x.GetSiteUris()).Returns(new List<string>
{
"http://www.dummysite.com",
"http://www.dummysite.com/home",
"http://www.dummysite.com/contacts",
"http://www.dummysite.com/about",
""
});

// Act
var result = _sitemapGenerator.Generate().AsXmlSitemap();
//Assert
//expect exception !
var isValidSchema = CheckIfSchemaIsValid(result);
}

/// <summary>
/// Validates the sitemap againg the sitemap.org schema
/// </summary>
/// <param name="xmlContent">Xml sitemap to validate</param>
/// <returns>Return true if the sitemap schema is valid, else return false.</returns>
private bool CheckIfSchemaIsValid(string xmlContent)
{
var xdoc = XDocument.Parse(xmlContent);
var schemas = new XmlSchemaSet();
schemas.Add("http://www.sitemaps.org/schemas/sitemap/0.9", "http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd");

Boolean result = false;

try
{
xdoc.Validate(schemas, null);
result = true;
}
catch (XmlSchemaValidationException ex)
{
throw ex;
}

return result;
}
}

 




No Comments


You can leave the first : )



Leave a Reply

Your email address will not be published. Required fields are marked *