XQuery/英国航运预报
英国航运预报由英国气象局每天发布四次,并在广播、气象局网站以及[不再] BBC 网站上发布。但是,它无法以计算机可读的形式获得。
Tim Duckett 最近在博客中谈到创建推特流。他使用 Ruby 解析文本预报。文本形式的预报包含在气象局和 BBC 网站上。但是正如 Tim 指出的那样,这种格式是为语音设计的,它将类似的区域压缩在一起以减少时段,并且难以解析。这里采用的方法是抓取包含原始区域预报数据的 JavaScript 文件。
以下脚本使用这些 eXist 模块
- request - 获取 HTTP 请求参数
- httpclient - GET 和 POST
- scheduler - 调度抓取任务
- dateTime - 格式化日期时间
- util - base64 转换
- xmldb - 用于数据库访问
- 英国气象局网站
[这种方法不再可行,因为检索到的 JavaScript 文件不再更新]
气象局网页显示了逐区域预报,但网页的这部分是由 JavaScript 从生成的 JavaScript 文件中的数据生成的。在该文件中,数据分配给多个数组。典型部分如下所示
// Bailey
gale_in_force[28] = "0";
gale[28] = "0";
galeIssueTime[28] = "";
shipIssueTime[28] = "1725 Sun 06 Jul";
wind[28] = "Northeast 5 to 7.";
weather[28] = "Showers.";
visibility[28] = "Moderate or good.";
seastate[28] = "Moderate or rough.";
area[28] = "Bailey";
area_presentation[28] = "Bailey";
key[28] = "Bailey";
// Faeroes
...
第一个函数使用 eXist httpclient 模块获取当前 JavaScript 数据,并将 base64 数据转换为字符串
xquery version "3.0";
declare namespace httpclient = "http://exist-db.org/xquery/httpclient";
declare namespace met = "http://kitwallace.co.uk/wiki/met";
declare variable $met:javascript-file := "http://www.metoffice.gov.uk/lib/includes/marine/gale_and_shipping_table.js";
declare function met:get-forecast() as xs:string {
(: fetch the javascript source and locate the text of the body of the response :)
let $base64:= httpclient:get(xs:anyURI($met:javascript-file),true(),())/httpclient:body/text()
(: this is base64 encoded , so decode it back to text :)
return util:binary-to-string($base64)
};
第二个函数从 JavaScript 中选取一个区域预报,并解析代码以使用 JavaScript 数组名称生成 XML 结构。
declare function met:extract-forecast($js as xs:string, $area as xs:string) as element(forecast)? {
(: isolate the section for the required area, prefixed with a comment :)
let $areajs := normalize-space(substring-before( substring-after($js,concat("// ",$area)),"//"))
return
if($areajs ="") (: area not found :)
then ()
else
(: build an XML element containing elements for each of the data items, using the array names as the element names :)
<forecast>
{
for $d in tokenize($areajs,";")[position() < last()] (: JavaScript statements terminated by ";" - ignore the last empty :)
let $ds := tokenize(normalize-space($d),' *= *"') (: separate the LHS and RHS of the assignment statement :)
let $name := replace(substring-before($ds[1],"["),"_","") (: element name is the array name, converted to a legal name :)
let $val := replace($ds[2],'"','') (: element text is the RHS minus quotes :)
let $val := replace ($val,"<.*>","") (: remove embedded annotation - in shipissuetime :)
return
element {$name} {$val}
}
</forecast>
};
要获取区域预报
let $js := met:get-forecast()
return
met:extract-forecast($js,"Fastnet")
例如,一个选定区域的输出为
<forecast>
<galeinforce>0</galeinforce>
<gale>0</gale>
<galeIssueTime/>
<shipIssueTime>1030 Tue 28 Oct</shipIssueTime>
<wind>Southwest veering northeast, 5 or 6.</wind>
<weather>Rain at times.</weather>
<visibility>Good, occasionally poor.</visibility>
<seastate>Moderate or rough.</seastate>
<area>Fastnet</area>
<areapresentation>Fastnet</areapresentation>
<key>Fastnet</key>
</forecast>
需要将预报数据格式化为字符串
declare function met:forecast-as-text($forecast as element(forecast)) as xs:string {
concat( $forecast/weather,
" Wind ", $forecast/wind,
" Visibility ", $forecast/visibility,
" Sea ", $forecast/seastate
)
};
let $js := met:get-forecast()
let $forecast := met:extract-forecast($js,"Fastnet")
return
<report>{met:forecast-as-text($forecast)}</report>
返回值
<report>Rain at times. Wind Southwest veering northeast, 5 or 6. Visibility Good, occasionally poor. Sea Moderate or rough.</report>
最后,这些函数可以在一个脚本中使用,该脚本接受航运区域名称并返回 XML 消息
let $js := met:get-forecast()
let $forecast := met:extract-forecast($js,"Fastnet")
return
<message area="{$area}" dateTime="{$forecast/shipIssueTime}">
{met:forecast-as-text($forecast)}
</message>
要创建适合短信(160 个字符)或推特(140 个字符限制)的消息,可以通过缩写常用词来压缩消息。
创建并本地存储词语和缩写的字典。该字典是在 Tim Duckett 的 Ruby 实现中的一些缩写的基础上开发的。
<dictionary>
<entry full="west" abbrev="W"/>
<entry full="westerly" abbrev="Wly"/>
..
<entry full="variable" abbrev="vbl"/>
<entry full="visibility" abbrev="viz"/>
<entry full="occasionally" abbrev="occ"/>
<entry full="showers" abbrev="shwrs"/>
</dictionary>
缩写函数将文本分解为词语,用缩写替换词语,然后重新构建文本
declare function met:abbreviate($forecast as xs:string) as xs:string {
string-join(
(: lowercase the string, append a space (to ensure a final . is matched) and tokenise :)
for $word in tokenize(concat(lower-case($forecast)," "),"\.? +")
return
(: if there is an entry for the word , use its abbreviation, otherwise use the unabbreviated word :)
( /dictionary/entry[@full=$word]/@abbrev,$word) [1]
,
" ") (: join the words back up with space separator :)
};
import module namespace met = "http://www.cems.uwe.ac.uk/xmlwiki/met" at "met.xqm";
let $area := request:get-parameter("area","Lundy")
let $forecast := met:get-forecast($area)
return
<message area="{$area}" dateTime="{$forecast/shipIssueTime}">
{met:abbreviate(met:forecast-as-text($forecast))}
</message>
此函数是区域预报的扩展。解析使用注释分隔符来分割脚本,忽略第一个和最后一个部分以及注释中的区域名称
declare function met:get-forecast() as element(forecast)* {
let $jsuri := "http://www.metoffice.gov.uk/lib/includes/marine/gale_and_shipping_table.js"
let $base64:= httpclient:get(xs:anyURI($jsuri),true(),())/httpclient:body/text()
let $js := util:binary-to-string($base64)
for $js in tokenize($js,"// ")[position() > 1] [position()< last()]
let $areajs := concat("gale",substring-after($js,"gale"))
return
<forecast>
{
for $d in tokenize($areajs,";")[position() < last()]
let $ds := tokenize(normalize-space($d)," *= *")
return
element {replace(substring-before($ds[1],"["),"_","")}
{replace($ds[2],'"','')}
}
</forecast>
};
此脚本返回完整的航运预报的 XML 版本。
import module namespace met = "http://www.cems.uwe.ac.uk/xmlwiki/met" at "met.xqm";
<ShippingForecast>
{met:get-forecast()}
</ShippingForecast>
XSLT 适合将此 XML 转换为 RSS 格式……
此数据的一种可能用途是提供一个按需短信服务,用户输入区域名称,系统返回简化的预报。生成完整的预报集,选择用户提供的区域的预报,并以简化的信息形式返回。
import module namespace met = "http://www.cems.uwe.ac.uk/xmlwiki/met" at "met.xqm";
let $area := lower-case(request:get-parameter("text",()))
let $forecast := met:get-forecast()[lower-case(area) = $area]
return
if (exists($forecast))
then
concat("Reply: ", met:abbreviate(met:forecast-as-text($forecast)))
else
concat("Reply: Area ",$area," not recognised")
短信服务的调用协议由安装在 UWE 的短信服务确定,其描述在此。
按需获取 JavaScript 既不高效也不符合网络行为,由于预报时间已知,最好按照预定的时间表获取数据,转换为 XML 格式并保存到 eXist 数据库中,然后将缓存的 XML 用于后续请求。
import module namespace met = "http://www.cems.uwe.ac.uk/xmlwiki/met" at "met.xqm";
declare variable $col := "/db/Wiki/Met/Forecast";
if (xmldb:login($col, "user", "password")) (: a user who has write access to the Forecast collection :)
then
let $forecast := met:get-forecast()
let $forecastDateTime := met:timestamp-to-xs-date(($forecast/shipIssueTime)[1]) (: convert to xs:dateTime :)
let $store := xmldb:store(
$col, (: collection to store forecast in :)
"shippingForecast.xml", (: file name - overwrite is OK here as we only want the latest :)
(: then the constructed XML to be stored :)
<ShippingForecast at="{$forecastDateTime}" >
{$forecast}
</ShippingForecast>
)
return
<result>
Shipping forecast for {string($forecastDateTime)} stored in {$store}
</result>
else ()
源数据中使用的 timestamps 转换为 xs:dateTime 以便于后续处理。
declare function met:timestamp-to-xs-date($dt as xs:string) as xs:dateTime {
(: convert timestamps in the form 0505 Tue 08 Jul to xs:dateTime :)
let $year := year-from-date(current-date()) (: assume the current year since none provided :)
let $dtp := tokenize($dt," ")
let $mon := index-of(("Jan","Feb", "Mar","Apr","May", "Jun","Jul","Aug","Sep","Oct","Nov","Dec"),$dtp[4])
let $monno := if($mon < 10) then concat("0",$mon) else $mon
return xs:dateTime(concat($year,"-",$monno,"-",$dtp[3],"T",substring($dtp[1],1,2),":",substring($dtp[1],3,4),":00"))
};
原始数据包含冗余元素(区域名称的多个版本)和通常为空的元素(在没有大风预警的情况下,所有与大风相关的元素),但缺少用作键的规范化区域名称。以下函数执行此重构。
declare function met:reduce($forecast as element(forecast)) as element(forecast) {
<forecast>
{ attribute area {lower-case($forecast/area)}}
{ $forecast/*
[not(name(.) = ("shipIssueTime","area","key"))]
[ if (../galeinforce = "0" )
then not(name(.) = ("galeinforce","gale","galeIssueTime"))
else true()
]
}
</forecast>
};
可以使用 XSLT 进行此转换,缓存脚本在保存预报之前会应用此转换。
修改后的短信脚本现在可以访问缓存。首先是一个获取存储的预报的函数。
declare function met:get-stored-forecast($area as xs:string) as element(forecast) {
doc("/db/Wiki/Met/Forecast/shippingForecast.xml")/ShippingForecast/forecast[@area = $area]
};
import module namespace met = "http://www.cems.uwe.ac.uk/xmlwiki/met" at "met.xqm";
let $area := lower-case(normalise-space(request:get-parameter("text",())))
let $forecast := met:get-stored-forecast($area)
return
if (exists($forecast))
then
concat("Reply: ", datetime:format-dateTime($forecast/../@at,"HH:mm")," ",met:abbreviate(met:forecast-as-text($forecast)))
else
concat("Reply: Area ",$area," not recognised")
在此脚本中,通过 met 函数调用提取的输入区域的选定预报是对数据库元素的引用,而不是副本。因此,仍然可以导航回包含 timestamps 的父元素。
eXist 的 datetime 函数是 Java 类 java.text.SimpleDateFormat 的包装器,它定义了日期格式化语法。
eXist 包含一个调度程序模块,它是 Quartz 调度程序 的包装器。作业只能由 DBA 用户创建。
例如,要设置一个每小时获取航运预报的作业,
let $login := xmldb:login( "/db", "admin", "admin password" )
let $job := scheduler:schedule-xquery-cron-job("/db/Wiki/Met/getandsave.xq" , "0 0 * * * ?")
return $job
其中 "0 0 * * * ?" 表示在每月的每一天的每小时的 0 秒 0 分运行,忽略星期几。
要检查已安排的作业集(包括系统调度作业),
let $login := xmldb:login( "/db", "admin", "admin password" )
return scheduler:get-scheduled-jobs()
最好根据预报的更新时间表安排作业。这些时间是 0015、0505、1130 和 1725。这些时间无法拟合到单个 cron 模式中,因此需要多个作业。由于作业通过其路径标识,因此所有实例无法使用相同的 url,因此添加了一个虚拟参数。
讨论 这些时间比发布的时间晚一分钟。这可能不足以弥补双方时间上的差异。显然,来自英国气象局的推送会比拉取抓取更好。调度程序时钟以当地时间 (BST) 运行,发布时间也是如此。
let $login := xmldb:login( "/db", "admin", "admin password" )
let $job1 := scheduler:schedule-xquery-cron-job("/db/Wiki/Met/getandsave.xq?t=1" , "0 16 0 * * ?")
let $job2 := scheduler:schedule-xquery-cron-job("/db/Wiki/Met/getandsave.xq?t=2" , "0 6 5 * * ?")
let $job3 := scheduler:schedule-xquery-cron-job("/db/Wiki/Met/getandsave.xq?t=3" , "0 31 11 * * ?")
let $job4 := scheduler:schedule-xquery-cron-job("/db/Wiki/Met/getandsave.xq?t=4" , "0 26 17 * * ?")
return ($job1, $job2, $job3, $job4)
英国气象局提供一个 可点击的地图,但 KML 地图会更好。海区的 坐标 可以被捕获并手动转换为 XML。
<?xml version="1.0" encoding="UTF-8"?>
<boundaries>
<boundary area="viking">
<point latitude="61" longitude="0"/>
<point latitude="61" longitude="4"/>
<point latitude="58.5" longitude="4"/>
<point latitude="58.5" longitude="0"/>
</boundary>
...
一个区域的边界通过两个函数访问。在这种习惯用法中,一个函数隐藏了文档位置并返回文档的根节点。后续函数使用此基本函数来获取文档,然后应用其他谓词进行过滤。
declare function met:area-boundaries() as element(boundaries) {
doc("/db/Wiki/Met/shippingareas.xml")/boundaries
};
declare function met:area-boundary($area as xs:string) as element(boundary) {
met:area-boundaries()/boundary[@area=$area]
};
一个区域的中心可以通过对纬度和经度求平均值来粗略计算。
declare function met:area-centre($boundary as element(boundary)) as element(point) {
<point
latitude="{round(sum($boundary/point/@latitude) div count($boundary/point) * 100) div 100}"
longitude="{round(sum($boundary/point/@longitude) div count($boundary/point) * 100) div 100}"
/>
};
我们可以从预报中生成一个 kml PlaceMark。
declare function met:forecast-to-kml($forecast as element(forecast)) as element(Placemark) {
let $area := $forecast/@area
let $boundary := met:area-boundary($area)
let $centre := met:area-centre($boundary)
return
<Placemark >
<name>{string($forecast/areapresentation)}</name>
<description>
{met:forecast-as-text($forecast)}
</description>
<Point>
<coordinates>
{string-join(($centre/@longitude,$centre/@latitude),",")}
</coordinates>
</Point>
</Placemark>
};
由于我们有区域坐标,我们还可以生成边界作为 kml 中的一条线。
declare function met:sea-area-to-kml(
$area as xs:string,
$showname as xs:boolean
) as element(Placemark)
{
let $boundary := met:area-boundary($area)
return
<Placemark >
{if($showname) then <name>{$area}</name> else()}
<LineString>
<coordinates>
{string-join(
for $point in $boundary/point
return
string-join(($point/@longitude,$point/@latitude,"0"),",")
, " "
)
}
</coordinates>
</LineString>
</Placemark>
};
import module namespace met = "http://www.cems.uwe.ac.uk/xmlwiki/met" at "met.xqm";
(: set the media type for a kml file :)
declare option exist:serialize "method=xml indent=yes
media-type=application/vnd.google-earth.kml+xml";
(: set the file name ans extension when saved to allow GoogleEarth to be invoked :)
let $dummy := response:set-header('Content-Disposition','inline;filename=shipping.kml;')
(: get the latest forecast :)
let $shippingForecast := met:get-stored-forecast()
return
<kml >
<Folder>
<name>{datetime:format-dateTime($shippingForecast/@at,"EEEE HH:mm")} UK Met Office Shipping forecast</name>
{for $forecast in $shippingForecast/forecast
return
(met:forecast-to-kml($forecast),
met:sea-area-to-kml($forecast/@area,false())
)
}
</Folder>
</kml>
此数据的另一种用途是提供一个通道,以便在收到预报后立即推送预报。该通道可以是发送给订阅者的短信提醒,也可以是用户可以关注的专用 Twitter 流。
此服务应允许用户请求特定区域或区域的提醒。该应用程序需要
- 一个数据结构来记录订阅者及其区域
- 一个 Web 服务来注册用户、其手机号码和初始区域 [待办]
- 一个短信服务来更改所需的区域并打开或关闭消息传递
- 一个计划任务,在获取新预报时推送短信
<subscriptions>
<subscription>
<username>Fred Bloggs</username>
<password>hafjahfjafa</password>
<mobilenumber>447777777</mobilenumber>
<area>lundy</area>
<status>off</status>
</subscription>
...
</subscriptions>
(待完成)
需要控制对该文档的访问。
第一层访问控制是将文件放在一个无法通过网络访问的集合中。在 UWE 服务器中,根节点(通过 mod-rewrite)是集合 /db/Wiki,因此此目录和子目录中的资源是可访问的,但受文件上的访问设置的约束,但父目录或同级目录中的文件不可访问。因此,此文档存储在 /db/Wiki2 目录中。此文件相对于外部根节点的 URL 是 http://www.cems.uwe.ac.uk/xmlwiki/../Wiki2/shippingsubscriptions.xml,但访问失败。
第二级控制是设置文件的所有者和权限。这是必要的,因为防火墙后面的客户端上的用户使用内部服务器地址将能够访问此文件。默认情况下,世界权限设置为读取和更新。删除此访问权限需要脚本以组或所有者身份登录以读取。
所有权和权限可以通过 Web 客户端或 eXist xmldb 模块中的函数设置。
此函数接受订阅,制定短信内容,并调用通用 sms:send 函数进行发送。它与我们的短信服务提供商对接。
declare function met:push-sms($subscription as element(subscription)) as element(result) {
let $area := $subscription/area
let $forecast := met:get-stored-forecast($area)
let $time := datetime:format-dateTime($forecast/../@at,"EE HH:mm")
let $text := encode-for-uri(concat($area, " ",$time," ",met:abbreviate(met:forecast-as-text($forecast))))
let $number := $subscription/mobilenumber
let $sent := sms:send($number,$text)
return
<result number="{$number}" area="{$area}" sent="{$sent}"/>
};
首先,我们需要获取活动订阅。这些函数遵循用于边界的相同习惯用法
declare function met:subscriptions() {
doc("/db/Wiki2/shippingsubscriptions.xml")/subscriptions
};
declare function met:active-subscriptions() as element(subscription) * {
met:subscriptions()/subscription[status="on"]
};
然后迭代活动订阅并报告结果。
declare function met:push-subscriptions() as element(results) {
<results>
{
let $dummy := xmldb:login("/db","webuser","password")
for $subscription in met:active-subscriptions()
return
met:push-sms($subscription)
}
</results>
};
此脚本遍历当前活动的订阅,并为每个订阅调用推送短信函数。
import module namespace met = "http://www.cems.uwe.ac.uk/xmlwiki/met" at "met.xqm";
met:push-subscriptions()
此任务可以安排在缓存任务运行后运行,也可以修改缓存脚本,使其在缓存任务完成后调用订阅任务。但是,eXist 也支持触发器,因此该任务也可以由数据库事件触发,该事件在完成预报文件存储时引发。
需要一种消息格式来编辑订阅状态和更改订阅区域。
metsub [ on |off |<area> ]
如果区域发生变化,状态将设置为开启。
该区域根据区域代码列表进行验证。这些代码是从边界数据中提取的。
declare function met:area-names() as xs:string* {
met:area-boundaries()/boundary/string(@area)
};
import module namespace met = "http://www.cems.uwe.ac.uk/xmlwiki/met" at "met.xqm";
let $login:= xmldb:login("/db","user","password")
let $text := normalize-space(request:get-parameter("text",()))
let $number := request:get-parameter("from",())
let $subscription := met:get-subscription($number)
return
if (exists($subscription))
then
let $update :=
if ( $text= "on")
then update replace $subscription/status with <status>on</status>
else if( $text = "off")
then update replace $subscription/status with <status>off</status>
else if ( lower-case($text) = met:area-names())
then ( update replace $subscription/area with <area>{$text}</area>,
update replace $subscription/status with <status>on</status>
)
else ()
return
let $subscription := met:get-subscription($number)(: get the subscription post update :)
return
concat("Reply: forecast is ",$subscription/status," for area ",$subscription/area)
else ()
Twitter 具有简单的 REST API 来更新状态。我们可以用它将预报推送到 Twitter 帐户。Twitter 使用基本访问身份验证,并且有一个合适的 XQuery 函数可以将消息发送到用户名/密码,使用 eXist httpclient 模块可以实现。
declare function met:send-tweet ($username as xs:string,$password as xs:string,$tweet as xs:string ) as xs:boolean {
let $uri := xs:anyURI("http://twitter.com/statuses/update.xml")
let $content :=concat("status=", encode-for-uri($tweet))
let $headers :=
<headers>
<header name="Authorization"
value="Basic {util:string-to-binary(concat($username,":",$password))}"/>
<header name="Content-Type"
value="application/x-www-form-urlencoded"/>
</headers>
let $response := httpclient:post( $uri, $content, false(), $headers )
return
$response/@statusCode='200'
};
需要一个脚本来访问存储的预报并将某个区域的预报推送到 Twitter。可以为每个航运区域设置不同的 Twitter 帐户。该脚本需要安排在获取完整预报后运行。
在本例中,特定区域的预报将推送到硬编码的 Twitter 用户。
import module namespace met = "http://www.cems.uwe.ac.uk/xmlwiki/met" at "met.xqm";
declare variable $username := "kitwallace";
declare variable $password := "mypassword";
declare variable $area := request:get-parameter("area","lundy");
let $forecast := met:get-stored-forecast($area)
let $time := datetime:format-dateTime($forecast/../@at,"HH:mm")
let $message := concat($area," at ",$time,":",met:abbreviate(met:forecast-as-text($forecast)))
return
<result>{met:send-tweet($username,$password,$message)}</result>
此任务非常适合使用 XForms。
使用触发器在更新完成后推送短信。