#! /usr/bin/env ruby require "rubygems" require "nokogiri" require "open-uri" require "iconv" require "rss/maker" require "webrick" class SkyrockblogArticle < Hash def to_xhtml ret = "
" unless self[:images_urls].empty? ret += "
" self[:images_urls].each { |url| ret += "" } ret += "
" end unless self[:texts].empty? ret += "
" self[:texts].each { |text| ret += "

#{text}

" } ret += "
" end ret += "
" end end class SkyrockblogPage < Array; end class Skyrockblog FRENCH_MONTHS = { "janvier" => 1, "février" => 2, "mars" => 3, "avril" => 4, "mai" => 5, "juin" => 6, "juillet" => 7, "août" => 8, "septembre" => 9, "octobre" => 10, "novembre" => 11, "décembre" => 12 } attr_reader :base_url protected @doc = nil @page = nil def parse_skyrockblog_date(skyrockblog_date) skyrockblog_date = Iconv.new("ISO-8859-1", "UTF-8").iconv(skyrockblog_date) matches = %r{Post. le \S+ (\d+) (\S+) (\d+)\s+(\d+):(\d+)}. match(skyrockblog_date) day, month, year, hour, min = matches[1], FRENCH_MONTHS[matches[2]], matches[3], matches[4], matches[5] Time.parse("#{year}-#{month}-#{day} #{hour}:#{min}") end def page_url(page) return @base_url if page <= 1 "#{@base_url}&page=#{page}" end def fetch_page(page) unless (page == false || @page == page) && @doc @doc = Nokogiri::HTML(open(page_url(page || 1))) @page = page end end def permalink(id) "http://www.skyrock.com/direct.php/#{id}:#{user_id}" end public def initialize(user_name) @base_url = "http://www.skyrock.com/blog/blog.php" + "?pseudo=#{user_name}&__FORCE_LANG=fr_FX" end def parse_page(page = 1) fetch_page(page) skyrockblog_page = SkyrockblogPage.new @doc.css(".bloc").each do |article| next unless aid = article["id"] and /^a-/.match(aid) id = aid.gsub(/^a-/, "") title_container = article.at("h2") or next skyrockblog_article = SkyrockblogArticle.new skyrockblog_article[:id] = id skyrockblog_article[:images_urls], skyrockblog_article[:texts] = [ ], [ ] skyrockblog_article[:title] = title_container.inner_text article.css("img").each { |image| skyrockblog_article[:images_urls] << image["src"] } article.css("div").each { |div| skyrockblog_article[:texts] << div.inner_text if div["class"] == 'text-container' } created_on_plaintext = article.at(".created_on").inner_text skyrockblog_article[:created_on] = parse_skyrockblog_date(created_on_plaintext) skyrockblog_article[:permalink] = permalink(skyrockblog_article[:id]) skyrockblog_page << skyrockblog_article end skyrockblog_page end def pagination fetch_page(false) return 1 unless pagination = @doc.at("ul.pagination") last = 1 pagination.css("a").each { |link| href = link["href"] matches = /(\d+)\.html$/.match(href) or next last = [ last, matches[1].to_i ].max } 1..last end def user_id fetch_page(false) matches = /id_skynaute\s*=\s*"?(\d+)"?/.match(@doc.text) matches[1] end def title fetch_page(false) @doc.at("title").text end def description fetch_page(false) @doc.at(".description").text end def fetch_articles(nb_max) found_pages = [ ] first_page = parse_page(1) pages = [ *pagination ] if first_page.size < 2 || first_page[0][:created_on] < first_page[1][:created_on] pages.reverse! end pages.each do |page| parse_page(page).each { |parsed_page| found_pages << parsed_page } break if found_pages.size >= nb_max end found_pages end def rss articles = fetch_articles(15) rss = RSS::Maker.make("1.0") do |r| r.encoding = "UTF-8" r.channel.title = title r.channel.link = base_url r.channel.description = description r.channel.about = description r.items.do_sort = true articles.each do |article| i = r.items.new_item i.title = article[:title] i.date = article[:created_on] i.link = article[:permalink] i.description = article[:texts].first i.content_encoded = article.to_xhtml end end rss end end class RSSServlet < WEBrick::HTTPServlet::AbstractServlet def do_GET(req, res) unless user_name = req.query['u'] res.status = 412 return end unless skyrockblog = Skyrockblog.new(user_name) res.status = 404 return end res.body = skyrockblog.rss.to_xml res['Content-Type'] = "text/xml" end end server = WEBrick::HTTPServer.new(:Port => 2000) trap("INT") { server.shutdown } server.mount("/rss", RSSServlet) server.start