这里我们首先要明白一个基本的原理,搜索引擎的爬行方式。整个互联网就像一张纵横交错的“网”:网的各个节点就是各个网页,而各个网页之间通过url相互连接。蜘蛛可以从一个网页出发,通过该网页上的url,爬到另一个网页;再通过另一个网页上的url,再爬到更多的网页……,以此类推。但如果是一个新发布的网站,可能就没有其他url指向它,那么它就永远不会被“爬到”(收录)。为了解决这个问题,新站可以自己主动向搜索引擎提交url,申请蜘蛛前来抓取(Google申请网址:),但申请时一般只会提交一个主页的url。
为了让所有的url(尤其是动态生成的)都能被蜘蛛快捷便利的检索到,我们就需要提供一个全面完整、架构清晰和更新及时的网站地图。
和处理重复内容的robots.txt文件,我们通过.ashx文件来生成一个基于sitemaps.org的xml格式的网站地图。网站地图生成之后,我们就可以向Google等搜索引擎提交。大量的文章证实,提交网站地图将极大的提高网站的收录速度和深度。其他几乎所有的SEO方法,都有可能效果难以证实、失效甚至带来副作用,但提交网站地图除外!
Linq to XML为我们带来了近乎完美的操作体验。
@ WebHandler Language="C#" Class="website" %>
using System;
using System.Web;
using System.Xml;
using System.Xml.Linq;
using System.Linq;
public class website : IHttpHandler {
public void ProcessRequest (HttpContext context) {
context.Response.ContentType = "text/xml";
//文件的声明信息,第第三个参数standalone的值yes 表示这个 XML 文档是自包含的(self-contained)而不依赖于外部所定义的一个 DTD.
XDeclaration declaration = new XDeclaration("1.0", "UTF-8", "yes");
context.Response.Write(declaration);
//循环取出数据,转换成XML节点
foreach (var item in Articles.GetArticles())
{
XElement url = new XElement("url");
wholeUrl = string.Format("{0}?id={1}&catelog={2}",fixedUrl,item.ID,item.Catelog);
XElement loc = new XElement("loc", wholeUrl);
XElement lastmod = new XElement("lastmod", item.LastMod.AddDays(-23).ToShortDateString());
XElement changefreq = new XElement("changefreq", item.Frequency);
XElement priority = new XElement("priority", item.Weight);
url.Add(loc, lastmod, changefreq, priority);
siteMap.Add(url);
}
//最后输出整个xml文件
context.Response.Write(siteMap);
}
public bool IsReusable {
get {
return false;
}
}
}
同样还将使用到xml技术的还有RSS
@ WebHandler Language="C#" Class="rss" %>
using System;
using System.Web;
using System.Xml;
using System.Xml.Linq;
public class rss : IHttpHandler {
public void ProcessRequest (HttpContext context) {
context.Response.ContentType = "text/xml";
context.Response.Write(""1.0\" encoding=\"UTF-8\" ?>");
XElement rssFeed = new XElement("rss", new XAttribute("version","2.0"));
string fixedUrl = "http://www.freeflying.com/article";
string wholeUrl = string.Empty;
XElement channel = new XElement("channel",
new XElement("title", "freeflying"),
new XElement("link", fixedUrl),
new XElement("description","the website for dream flying freely"),
new XElement("pubDate",DateTime.Now.ToString())
);
foreach (var article in Articles.GetArticles())
{
XElement item = new XElement("item");
XElement title = new XElement("title", article.Title);
wholeUrl = string.Format("{0}?id={1}&catelog={2}", fixedUrl, article.ID, article.Catelog);
XElement link = new XElement("link", wholeUrl);
XElement description = new XElement("description", article.Description);
XElement pubDate = new XElement("pubDate", article.LastMod.ToString());
item.Add(title,link,description,pubDate);
channel.Add(item);
}
rssFeed.Add(channel);
context.Response.Write(rssFeed);
}
public bool IsReusable {
get {
return false;
}
}
}
模拟数据
using System;
using System.Data;
using System.Configuration;
using System.Linq;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.HtmlControls;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Xml.Linq;
using System.Web.UI.MobileControls;
using System.Collections.Generic;
///
/// Summary description for Articles
///
public class Articles
{
public Articles()
{
//
// TODO: Add constructor logic here
//
}
public static ListArticle> GetArticles()
{
return new ListArticle>(){
new Article(234, "blog", DateTime.Now.AddDays(-23), Freq.none, 0.8, "asp.net seo", "articles about SEO in asp.net"),
new Article(267, "blog", DateTime.Now.AddDays(-245), Freq.daily, 0.6, "ado.net pro","about the dataset usage"),
new Article(653, "news", DateTime.Now.AddDays(-45), Freq.daily, 1,"CLR via C#","notebook about this book")
};
}
}
public class Article
{
public int ID;
public string Catelog;
public DateTime LastMod;
public double Weight;
public Freq Frequency;
public string Title;
public string Description;
public Article(int id, string catelog, DateTime lastMod, Freq frequency, double weight, string title, string description)
{
ID = id;
Catelog = catelog;
LastMod = lastMod;
Weight = weight;
Frequency = frequency;
Title = title;
Description = description;
}
}
public enum Freq
{
none = 1,
daily = 2,
weekly = 3,
}