webif/var/mongoose/html/dedup/normalise.jim

85 lines
1.9 KiB
Plaintext
Raw Normal View History

#!/mod/bin/jimsh
set dedup_prefixes {
{^new series\.* *}
{^cbeebies\.* *}
{^cbbc\.* *}
{^t4: *}
{^brand new series *-* *}
{^\.+}
}
proc loadseries {dir} {
global seriesmap
set seriesmap [dict create]
if {![file exists "$dir/series.info"]} { return }
if {[catch {set fd [open "$dir/series.info" "r"]} msg]} {
puts "Error opening series.info: $msg"
return
}
foreach line [split [read $fd] "\n\r"] {
set i [string first " ==> " $line]
if {$i == -1} { continue }
set name [string range $line 0 $($i - 1)]
set prefix [string range $line $($i + 5) end]
if {$prefix eq ""} { continue }
dict set seriesmap $name $prefix
regsub -all -- {[[:space:]]+} [string tolower $name] "" name
dict set seriesmap $name $prefix
}
$fd close
}
proc dedupnormalise {title {reserve ""}} {
global dedup_prefixes seriesmap
# Strip common prefixes
foreach prefix $dedup_prefixes {
regsub -nocase -all -- $prefix $title "" title
}
# Strip anything following a colon.
regsub -all -- { *[:].*$} $title "" title
# If the resulting string is longer than 40 characters then
# split around . and take the left hand side if appropriate.
if {[string length $title] > 40} {
lassign [split $title "."] v w
set title $v
if {[string length $title] < 6 && [string length $w] < 6} {
append title "_$w"
}
}
if {[dict exists $seriesmap $title]} {
set ntitle "$seriesmap($title): $title"
set title $ntitle
} else {
# Try lower case without spaces
regsub -all -- {[[:space:]]+} [string tolower $title] "" ntitle
if {[dict exists $seriesmap $ntitle]} {
set title "$seriesmap($ntitle): $title"
}
}
# if still short, add the reserve string.
if {[string length $title] < 6} {
if {[string match "${title}*" $reserve]} {
set title $reserve
} else {
append title " $reserve"
}
}
# Shorten if too long.
if {[string length $title] > 40} {
set title [string range $title 0 39]
}
return $title
}