[vdr] ANNOUCEMENT: xmltv2epg
Pjotr Kourzanov
peter.kourzanov at xs4all.nl
Wed Jan 10 17:23:21 CET 2007
Oops, forgot the scripts;-)
Pjotr Kourzanov wrote:
> Dear all,
>
> Having been dissatisfied with performance and features of xmltv2vdr
> (too slow, no credits/category information in epg.data) I have created
> a new script (based on AWK), that can be found in the attachment.
>
> If you want to try it, put your channels.conf.xmltv in one folder
> with these two scripts, and then run cat *.xml | ./xmltv2epg > epg.
>
> My mileage: 54MB XML -> 18MB EPG in 44 minuties (xmltv2vdr used to
> take 9 hours).
>
> Pjotr
>
>
> _______________________________________________
> vdr mailing list
> vdr at linuxtv.org
> http://www.linuxtv.org/cgi-bin/mailman/listinfo/vdr
>
-------------- next part --------------
#!/bin/sh
# external dependencies
date="/bin/date"
sed="/bin/sed"
grep="/bin/grep"
recode='/usr/bin/recode -f "$in_charset..$charset"'
# input data
dir=`dirname $0`
channels="$dir/channels.conf.xmltv"
# miscellaneous
htmlnorm='s,<[^<>]*>,\n&\n,g'
in_tz="Europe/Amsterdam"
export in_charset=utf8
export charset=utf8
export LC_ALL=ru_RU.UTF-8
export TZ="Europe/Amsterdam"
xmltv2epg_awk="$dir/xmltv2epg.awk"
$sed -e $htmlnorm | $grep -v "^[ \t\r\n]*$" | $xmltv2epg_awk -vchannels="$channels" -vbindate="$date" | eval $recode
-------------- next part --------------
#!/usr/bin/awk -f
BEGIN {
stderr="/dev/stderr"
load_channels(channels)
#curid=""
separator["desc"]="|"
separator["display-name"]=separator["title"]=separator["category"]=" / "
separator["adapter"]=separator["presenter"]=separator["director"]=separator["actor"]=", "
}
#{ print >stderr }
#/^<[^ \t\r\n]+/
{ handled=0 }
/^<rating/ {
rating_system=gensub(/.*system="([^"]*)".*/,"\\1","g")
}
/^<subtitles/ { handled=1
data["subtitles"]=gensub(/.*type="([^"]*)".*/,"\\1","g")
}
/^<channel/ { handled=1
id=gensub(/.*id="([^"]*)".*/,"\\1","g")
data["display-name"]=""
}
/^<\/channel>/ { handled=1
names[id]=data["display-name"]
if (!channel_src[id]) {
print "NO SRC for",id > stderr
next
}
events[id]=1
}
/^<programme/ { handled=1
id=gensub(/.*channel="([^"]*)".*/,"\\1","g")
if (!events[id]) next
#if (id!=curid && curid) {
# curid=id
#}
if (events[id]==1) {
if (in_channel) print "c"
print "C",channel_src[id],names[id]
in_channel=1
}
start=gensub(/.*start="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9]) (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5:\\6 \\7","g")
if (start==$0)
start=gensub(/.*start="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])? (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5 \\7","g")
stop=gensub(/.*stop="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9]) (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5:\\6 \\7","g")
if (stop==$0)
stop=gensub(/.*stop="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])? (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5 \\7","g")
#print id,start,stop
if (stop==$0) stop=start
cmd=sprintf("%s -d '%s' +'%%s';"\
"%s -d '%s' +'%%s'",
bindate,start,
bindate,stop)
i=1
while (cmd | getline line) dates[i++]=line
close(cmd)
if (dates[2]==dates[1]) dates[2]=dates[1]+60*60*3
print "E",events[id],dates[1],dates[2]-dates[1]
delete data
delete curtag
}
/^<\/programme/ { handled=1
if (!events[id]) next
events[id]++
if(data["title"]) print "T",data["title"]
if(data["category"]) print "S",data["category"]
oOFS=OFS; OFS="|"
ext=""
for (t in curtag) ext=append(ext,t)
print "D "\
(data["sub-title"] ? data["sub-title"] : "")\
(data["episode-num"] ? (data["sub-title"] ? ", " : "") "part " gensub(/ . (.*) . /,"\\1","g",data["episode-num"]) : "")\
(data["country"] ? data["country"] " " : "")\
(data["date"] ? data["date"] " " : "")\
(data["premiere"] ? "(premiere in " data["premiere"] ")" : ""),
(data["desc"] ? data["desc"] : ""),"|",
(ext ? ext " ": "")\
(data["audio"] ? data["audio"] " ": "")\
(data["video"] ? data["video"] " ": "")\
(data["colour"]=="no" ? " BW " : "")\
(data["subtitles"] ? data["subtitles"] " ": ""),
(data["adapter"] ? "|Adapter: " data["adapter"] : "")\
(data["presenter"] ? "|Presenter: " data["presenter"] : "")\
(data["director"] ? "|Director: " data["director"] : "")\
(data["actor"] ? "|Actor(s): " data["actor"] : "")\
(data["writer"] ? "|Writer(s): " data["writer"] : ""),
(data["rating"] ? "|rating " data["rating"] (rating_system ? "(" rating_system ")" : "") : "")\
(data["star-rating"] ? "|" data["star-rating"] : "")
OFS=oOFS
print "e"
}
!handled && /^<[^ \t\r\n]+.*\/>/ {
tag=gensub(/^<([^ \t\r\n]+).*>/,"\\1","g")
#print "bool tag:",tag
curtag[tag]=1
next
}
!handled && /^<[^ \t\r\n]+.*>/ {
tag=gensub(/^<([^ \t\r\n]+).*>/,"\\1","g")
if (tag ~ /^\//) {
tag=substr(tag,2)
value=0
delete active[tag]
} else
active[tag]=1
#tags[tag]=1
#print "tag:",tag,value
next
}
!handled {
for (t in active)
data[t]=append(data[t],txtconv($0),separator[t])
}
END {
print "c"
#for (t in tags) print t > stderr
}
function load_channels(channels, a,i,n,chsrc,b,c,j,m)
{ oFS=FS; FS=":"
while (getline < channels) if ($0 !~ /^:/)
{
name=$1; freq=$2; mode=$3; src=$4
sr=$5; vpid=$6; apid=$7; tpid=$8
ca=$9; spid=$10; npid=$11; tid=$12
rid=$13; esrc=$14
n=split(esrc,a,";")
for (i=1; i<=n; i++) {
chsrc=src "-" npid "-" tid "-" spid
if (a[i] ~ /^vsetv=/) {
split(a[i],b,"=")
m=split(b[2],c,",")
for (j=1; j<=m; j++) {
print "vsetv:",chsrc,c[j] > stderr
channel_src[c[j]]=append(channel_src[c[j]],chsrc)
channel_ch[chsrc]=append(channel_ch[chsrc],c[j])
}
} else {
m=split(a[i],c,",")
for (j=1; j<=m; j++) {
print "xmltv:",chsrc,c[j] > stderr
#channel_src[c[j]]=append(channel_src[c[j]],chsrc)
channel_src[c[j]]=chsrc
channel_ch[chsrc]=append(channel_ch[chsrc],c[j])
}
}
}
}
FS=oFS
}
function append(a,b,c) { return a (a ? (c ? c : " ") : "") b }
function txtconv(a)
{
a=gensub("&","\\&","g",a)
a=gensub(""","\"","g",a)
a=gensub("'","'","g",a)
a=gensub("<","<","g",a)
a=gensub(">;",">","g",a)
a=gensub(":",":","g",a)
a=gensub("ö","ö","g",a)
a=gensub("’","'","g",a)
return a
}
More information about the vdr
mailing list