XQuery/BBC 天气预报
外观
< XQuery
BBC 提供一些天气数据,以 RSS 形式提供。目前,这包括 当前状况 和 3 天预报。由于缺少天气属性的标准标签集,状况以字符串形式表示,需要进行字符串解析才能访问元素数据。
对于其他预报,例如 24 小时和 5 天预报,这些预报不以 RSS 形式提供,因此我们必须抓取 HTML 页面。
完成此任务的一种方法是使用这个 雅虎管道,它将页面转换为 RSS 提要。但是,将数据转换为 XML 元素会更有用。
在所有这些页面和提要中,都存在一个问题,即为预报或观测分配日期。日期经常被省略或表示为星期几。这会导致处理 RSS 和 HTML 页面时出现复杂情况。
此脚本使用 eXist 模块 httpclient 获取 HTML,解析 HTML 并生成 XML 文件。然后,可以通过 XSLT 将此 XML 转换为可查看的页面。
此脚本有两个参数
- region - 必填 - BBC 独有的数字代码(?代码列表)
- area - 可选 - 子区域,通常是邮政编码的开头
declare namespace h ="http://www.w3.org/1999/xhtml";
declare function local:day-of-week($date) {
('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat')
[ xs:integer(($date - xs:date('1901-01-06'))
div xs:dayTimeDuration('P1D')) mod 7
+1]
};
let $area := request:get-parameter("area",())
let $region := request:get-parameter("region","2")
let $url := concat ("http://news.bbc.co.uk/weather/forecast/",$region, "?state=fo:B", if (exists($area)) then concat("&area=",$area) else ())
let $doc := httpclient:get(xs:anyURI($url),false(),())
let $currentDate := current-date()
let $currentTime := current-time()
let $dow := local:day-of-week($currentDate)
return
element forecasts {
element region {$region},
if (exists($area)) then element area {$area} else () ,
element source {"BBC"},
for $row in $doc/httpclient:body//h:table/h:tbody/h:tr
let $raw-time :=normalize-space($row/h:td[1])
let $time := if (contains($raw-time," ")) then substring-before($raw-time," ") else $raw-time
let $time := xs:time(concat($time,":00"))
let $pdow := if (contains($raw-time,"(")) then substring-before(substring-after($raw-time,"("),")") else $dow
let $date := if ($pdow ne $dow) then $currentDate + xs:dayTimeDuration("P1D") else $currentDate
return
element forecast {
element date {$date},
element time {$time},
element dow {$pdow},
element summary {string($row/h:td[2]//h:p[@class="sum"])},
element imageurl {string($row/h:td[2]//h:div[@class="summary"]//h:img/@src)},
element maxTemp{ attribute units {"degc"} , $row/h:td[3]//h:span[@class="cent"]/text()},
element maxTemp {attribute units {"degf"} , $row/h:td[3]//h:span[contains(@class,"fahr")]/text()},
element windDirection {string($row/h:td[4]//h:span[contains(@class,"wind")]/@title)},
element windSpeed {attribute units {"mph"} , substring-before($row/h:td[4]//h:span[contains(@class,"mph")], "mph")},
element windSpeed {attribute units {"kph"} ,substring-before($row/h:td[4]//h:span[contains(@class,"kph")], "km/h")},
element humidity {attribute units {"%"}, normalize-space(substring-before($row/h:td[5]//h:span[contains(@class,"hum")], "%"))},
element pressure { attribute units {"mb"} , normalize-space(substring-before($row/h:td[5]//h:span[@class="pres"], "mB"))},
element visibility {normalize-space($row/h:td[5]//h:span[contains(@class,"vis")])}
}
}