[vdr] ANNOUCEMENT: xmltv2epg

Pjotr Kourzanov peter.kourzanov at xs4all.nl
Wed Jan 10 17:23:21 CET 2007


Oops, forgot the scripts;-)

Pjotr Kourzanov wrote:
> Dear all,
> 
>   Having been dissatisfied with performance and features of xmltv2vdr
> (too slow, no credits/category information in epg.data) I have created
> a new script (based on AWK), that can be found in the attachment.
> 
>   If you want to try it, put your channels.conf.xmltv in one folder
> with these two scripts, and then run cat *.xml | ./xmltv2epg > epg.
> 
>   My mileage: 54MB XML -> 18MB EPG in 44 minuties (xmltv2vdr used to
> take 9 hours).
> 
> Pjotr
> 
> 
> _______________________________________________
> vdr mailing list
> vdr at linuxtv.org
> http://www.linuxtv.org/cgi-bin/mailman/listinfo/vdr
> 

-------------- next part --------------
#!/bin/sh
# external dependencies
date="/bin/date"
sed="/bin/sed"
grep="/bin/grep"
recode='/usr/bin/recode -f "$in_charset..$charset"'

# input data
dir=`dirname $0`
channels="$dir/channels.conf.xmltv"

# miscellaneous
htmlnorm='s,<[^<>]*>,\n&\n,g'

in_tz="Europe/Amsterdam"
export in_charset=utf8

export charset=utf8
export LC_ALL=ru_RU.UTF-8
export TZ="Europe/Amsterdam"

xmltv2epg_awk="$dir/xmltv2epg.awk"

$sed -e $htmlnorm | $grep -v "^[ \t\r\n]*$" | $xmltv2epg_awk -vchannels="$channels" -vbindate="$date" | eval $recode
-------------- next part --------------
#!/usr/bin/awk -f
BEGIN {
	stderr="/dev/stderr"
	load_channels(channels)
	#curid=""
	separator["desc"]="|"
	separator["display-name"]=separator["title"]=separator["category"]=" / "
	separator["adapter"]=separator["presenter"]=separator["director"]=separator["actor"]=", "
}

#{ print >stderr }
#/^<[^ \t\r\n]+/
{ handled=0 }

/^<rating/		{ 
	rating_system=gensub(/.*system="([^"]*)".*/,"\\1","g")
}

/^<subtitles/		{ handled=1
	data["subtitles"]=gensub(/.*type="([^"]*)".*/,"\\1","g")
}

/^<channel/		{ handled=1
	id=gensub(/.*id="([^"]*)".*/,"\\1","g")
	data["display-name"]="" 
}

/^<\/channel>/		{ handled=1
	names[id]=data["display-name"]
	if (!channel_src[id]) {
	  	print "NO SRC for",id > stderr
		next
	}
	events[id]=1
}

/^<programme/ 		{ handled=1
	id=gensub(/.*channel="([^"]*)".*/,"\\1","g")
	if (!events[id]) next

	#if (id!=curid && curid) {
	#	curid=id
	#}

  	if (events[id]==1) {
		if (in_channel) print "c"
		print "C",channel_src[id],names[id]
		in_channel=1
	}

	start=gensub(/.*start="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9]) (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5:\\6 \\7","g")
if (start==$0)
	start=gensub(/.*start="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])? (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5 \\7","g")

	stop=gensub(/.*stop="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9]) (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5:\\6 \\7","g")
if (stop==$0)
	stop=gensub(/.*stop="([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])([0-9][0-9])? (+[^"]*)".*/,"\\1-\\2-\\3 \\4:\\5 \\7","g")

	#print id,start,stop
	if (stop==$0) stop=start

	cmd=sprintf("%s -d '%s' +'%%s';"\
		    "%s -d '%s' +'%%s'",
		    bindate,start,
		    bindate,stop)

	i=1
	while (cmd | getline line) dates[i++]=line
	close(cmd)

	if (dates[2]==dates[1]) dates[2]=dates[1]+60*60*3

	print "E",events[id],dates[1],dates[2]-dates[1]

	delete data
	delete curtag
}

/^<\/programme/ { handled=1
	if (!events[id]) next
	events[id]++
        if(data["title"]) print "T",data["title"]
        if(data["category"]) print "S",data["category"]
	oOFS=OFS; OFS="|"
	ext=""
	for (t in curtag) ext=append(ext,t)
	print "D "\
		(data["sub-title"] ? data["sub-title"] : "")\
		(data["episode-num"] ? (data["sub-title"] ? ", " : "") "part " gensub(/ . (.*) . /,"\\1","g",data["episode-num"]) : "")\
		(data["country"] ? data["country"] " " : "")\
		(data["date"] ? data["date"] " " : "")\
		(data["premiere"] ? "(premiere in " data["premiere"] ")" : ""),
		(data["desc"] ? data["desc"] : ""),"|",
		(ext ? ext " ": "")\
		(data["audio"] ? data["audio"] " ": "")\
		(data["video"] ? data["video"] " ": "")\
		(data["colour"]=="no" ? " BW " : "")\
		(data["subtitles"] ? data["subtitles"] " ": ""),
		(data["adapter"] ? "|Adapter: " data["adapter"] : "")\
		(data["presenter"] ? "|Presenter: " data["presenter"] : "")\
		(data["director"] ? "|Director: " data["director"] : "")\
		(data["actor"] ? "|Actor(s): " data["actor"] : "")\
		(data["writer"] ? "|Writer(s): " data["writer"] : ""),
		(data["rating"] ? "|rating " data["rating"] (rating_system ? "(" rating_system ")" : "") : "")\
		(data["star-rating"] ? "|" data["star-rating"] : "")
	OFS=oOFS
	print "e"	
}

!handled && /^<[^ \t\r\n]+.*\/>/ {
	tag=gensub(/^<([^ \t\r\n]+).*>/,"\\1","g")
	#print "bool tag:",tag
	curtag[tag]=1
	next
}

!handled && /^<[^ \t\r\n]+.*>/ {
	tag=gensub(/^<([^ \t\r\n]+).*>/,"\\1","g")
	if (tag ~ /^\//) {
		tag=substr(tag,2)
		value=0
		delete active[tag]
	} else
		active[tag]=1
	#tags[tag]=1
	#print "tag:",tag,value
	next
}

!handled {
	for (t in active) 
		data[t]=append(data[t],txtconv($0),separator[t])
}

END {
	print "c"
	#for (t in tags) print t > stderr
}
function load_channels(channels, a,i,n,chsrc,b,c,j,m)
{ oFS=FS; FS=":"
	while (getline < channels) if ($0 !~ /^:/)
	{
		name=$1; freq=$2; mode=$3; src=$4
		sr=$5; vpid=$6; apid=$7; tpid=$8
		ca=$9; spid=$10; npid=$11; tid=$12
		rid=$13; esrc=$14
		n=split(esrc,a,";")
		for (i=1; i<=n; i++) {
		    chsrc=src "-" npid "-" tid "-" spid
		    if (a[i] ~ /^vsetv=/) {
			split(a[i],b,"=")
			m=split(b[2],c,",")
			for (j=1; j<=m; j++) {
				print "vsetv:",chsrc,c[j] > stderr
				channel_src[c[j]]=append(channel_src[c[j]],chsrc)
				channel_ch[chsrc]=append(channel_ch[chsrc],c[j])
			}
		    } else {
			m=split(a[i],c,",")
			for (j=1; j<=m; j++) {
				print "xmltv:",chsrc,c[j] > stderr
				#channel_src[c[j]]=append(channel_src[c[j]],chsrc)
				channel_src[c[j]]=chsrc
				channel_ch[chsrc]=append(channel_ch[chsrc],c[j])
			}
		    }
		}
	}
  FS=oFS
}
function append(a,b,c) { return a (a ? (c ? c : " ") : "") b }
function txtconv(a)
{
    	a=gensub("&amp;","\\&","g",a)
    	a=gensub("&quot;","\"","g",a)
    	a=gensub("&apos;","'","g",a)
    	a=gensub("&lt;","<","g",a)
    	a=gensub("&gt;;",">","g",a)
    	a=gensub("&#58;",":","g",a)
    	a=gensub("&#246;","ö","g",a)
    	a=gensub("&#146;","'","g",a)
	return a
}


More information about the vdr mailing list