# RSS to POP converter script # # This script is for those, who want to read news offline and have not yet found # an appropriate rss client which would also download (part of) the html page referenced. # # It requires additional rss_xxx.properties files with the definitions of rss feeds # In these files you have to specify the feed and its url, e.g. here the feed yyy: # [yyy] # url=http://rss.cnn.com/rss/cnn_topstories.rss # Additionally you should specify the pattern to get the text of the news # (without navigation, ...), e.g.: # text=(

.*?
.*?
) # Otherwise the whole page is downloaded. # To remove ads or other stuff from the test, use a delete pattern, e.g.: # delete=]*> # Text and delete pattern can also be specified before the first feed. They will # be used unless a feed specifies an own pattern. # # To use rss feeds in your mail client, use the username # xxx-yyy@rss # where xxx is the property file name (without rss_ and .properties) and [yyy] is the feed. # If no feed is specified, e.g. # xxx@rss # then all feeds are retrieved. # You can have multiple feeds downloaded by separating them with a dot: # xxx1-yyy1.xxx2-yyy2.xxx3@rss # # @Requires: 1.1 # @Version: 0.7.1 # german texts (with MrPostman 1.2.2+) # @Version: 0.7 # possibility to add styles and download multipage news # @Version: 0.6 # enhanced help for MrPostman 1.2+ # @Version: 0.5 # use internal html2text instead of script function # @Version: 0.4 # update functionality (only with 1.1 and later) # @Version: 0.3 # plain text download # @Version: 0.2 sub getInfo local(info) info.name = "RSS feeds" info.authors[0] = "Martin Vlcek " info.version = "0.7.1" info.updateService = "" info.documentationlink = "" info.options["descrDir"].order = 1 info.options["descrDir"].value = "rss" info.options["descrDir"].label = "Feed Descr. Directory" info.options["descrDir"].description = "Directory with the feed descriptions" info.options["updateUrl"].order = 2 info.options["updateUrl"].value = "http://mrpostman.sourceforge.net/scripts.html" info.options["updateUrl"].label = "Update URL(s)" info.options["updateUrl"].description = "Where to get updated feed descriptions from" info.options["downloadImages"].order = 3 info.options["downloadImages"].value = true info.options["downloadImages"].label = "Download images" info.options["downloadImages"].description = "Download images as attachments and adjust image links accordingly" info.options["plain"].order = 4 info.options["plain"].value = false info.options["plain"].label = "Plain text" info.options["plain"].description = "Download as plain text, no images" #--- german texts info.options["descrDir"].label = "RSS-Beschr.-Ordner" info.options["descrDir"].description = "Verzeichnis mit den RSS-Feed-Beschreibungen" info.options["updateUrl"].label = "Aktualisierungs-URL(s)" info.options["updateUrl"].description = "URL(s) von Webseiten mit aktuellen RSS-Feed-Beschreibungen" info.options["downloadImages"].label = "Bilder herunterladen" info.options["downloadImages"].description = "Bilder als Anhänge herunterladen und Bilder-Links entsprechend anpassen" info.options["plain"].label = "Nur Text" info.options["plain"].description = "Nur reinen Text ohne Formatierungen und Bilder herunterladen" return info end sub getDocumentationText local(d,dir,filename,found,feed,numfound,subfeeds,subfeed) dir = info.options["descrDir"].value d = "

RSS News Feed Module

\n" d = d & "

This module provides RSS news feed support for MrPostman.

\n" d = d & "

Detailed information on how to set up your e-mail client for RSS feeds, may be found here.

\n" d = d & "

Currently the following feed files are available (in parentheses the user name you would use in your e-mail client, multiple feeds can be separated by a dot, e.g. cnn_com-topstories.bbc_co_uk-world@rss):

\n\n" return d end sub getExtensions return list("@rss") end sub loginForReceive(username,password) local(found,nufound,feedlist,feeds,feed,mainfeed,subfeed,issubfeed,rawfile,properties) local(mainproperties,issubfeedxml,rawchannel,from,rawitems,rawitem,msg) found,feedlist = match(username,"^(.*)@") feeds = split(feedlist,"\.") feeddir = info.options["descrDir"].value msgs = list() foreach(feeds,feed) issubfeed,mainfeed,subfeed = match(feed,"^([^-]*)-(.*)$") if (issubfeed) rawfile = readfile(feeddir & "/rss_" & mainfeed & ".properties") found,mainproperties = match(rawfile,"\A(.*)(^\[|$)") found,properties = match(rawfile,"^\[" & subfeed & "\](.*)(^\[|$)") msgs = join(msgs,getMessageList(feed,properties,mainproperties)) end if (!issubfeed) rawfile = readfile(feeddir & "/rss_" & feed & ".properties") found,mainproperties = match(rawfile,"\A(.*)(^\[|$)") numfound,subfeeds = matchall(rawfile,"^\[([^\]]*)\]") foreach(subfeeds,subfeed) found,properties = match(rawfile,"^\[" & subfeed & "\](.*)(^\[|$)") msgs = join(msgs,getMessageList(feed & "_" & subfeed,properties,mainproperties)) end end end return true end sub getMessageList(feed,feedproperties,feedmainproperties) local(found,numfound,feedurl,feedxml,feedtitle,feedmsgs,feedmsg) local(txtpatternfound,txtpattern,delpatternfound,delpattern) local(stylefound,style,urlpatternfound,urlpattern) local(rawchannel,rawitems,rawitem) feedmsgs = list() found,feedurl = match(feedproperties,"^url=([^\\n]*)") warningif(!found,"No url found for feed '" & feed & "'") if (found) txtpatternfound,txtpattern = match(feedproperties,"^text=([^\\n]*)") if (!txtpatternfound) txtpatternfound,txtpattern = match(feedmainproperties,"^text=([^\\n]*)") end warningif(!txtpatternfound,"No text pattern found for feed '" & feed & "'") delpatternfound,delpattern = match(feedproperties,"^delete=([^\\n]*)") if (!delpatternfound) delpatternfound,delpattern = match(feedmainproperties,"^delete=([^\\n]*)") end stylefound,style = match(feedproperties,"^style=([^\\n]*)") if (!stylefound) stylefound,style = match(feedmainproperties,"^style=([^\\n]*)") end urlpatternfound,urlpattern = match(feedproperties,"^follow=([^\\n]*)") if (!urlpatternfound) urlpatternfound,urlpattern = match(feedmainproperties,"^follow=([^\\n]*)") end status,feedxml = get(feedurl) warningif(status != 200,"Can't retrieve feed '" & feed & "'") if (status == 200) found,rawchannel = match(feedxml,"|\s+[^>]*>)(.*?)",true) found,feedtitle = match(rawchannel,"|\s+[^>]*>)(.*?)",true) if (!found) feedtitle = feed end numfound,rawitems[] = matchall(feedxml,"|\s+[^>]*>)(.*?)",true) foreach(rawitems,rawitem) feedmsg = map() feedmsg.to = "MrPostman RSS Feed" feedmsg.from = feedtitle feedmsg.txtpattern = txtpattern feedmsg.delpattern = delpattern feedmsg.style = replaceall(style,"\\\\n","\n") feedmsg.urlpattern = urlpattern found,feedmsg.subject = match(rawitem,"|\s+[^>]*>)(.*?)",true) found,feedmsg.link = match(rawitem,"|\s+[^>]*>)(.*?)",true) feedmsg.id = replaceall(feedmsg.link,"[^a-zA-Z0-9-]+","_") found,feedmsg.description = match(rawitem,"|\s+[^>]*>)(.*?)",true) feedmsgs[size(feedmsgs)] = feedmsg end end end return feedmsgs end sub receive(msg) local(status,page,found,numfound,attachments,attachment,ids) local(text,urlfound,url) status,page = get(msg.link) setbaseurl(msg.link) msg.date = now() msg.charset = "UTF8" msg.mimetype = "text/html" found,msg.text = match(page,msg.txtpattern,true) if (!found) warning("News item could not be extracted from page '" & msg.link & "' - getting full page") msg.text = page end if (found) #-- follow urls, e.g. for multi page articles if (msg.urlpattern != "") urlfound,url = match(page,msg.urlpattern,true) info(urlfound & " " & url) while (urlfound) status,page = get(url) found,text = match(page,msg.txtpattern,true) info(found & " " & text) if (found) msg.text = msg.text & "\n" & text end urlfound,url = match(page,msg.urlpattern,true) info(urlfound & " " & url) end end if (msg.style != "" && !info.options["plain"].value) msg.text = "\n" & msg.text end msg.text = "\n" & msg.text & "\n" end if (msg.delpattern != "") msg.text = replaceall(msg.text,msg.delpattern,"",true) end msg.attachments = list() if (info.options["downloadImages"].value && !info.options["plain"].value) ids = list() numfound,attachments[].link = matchall(msg.text,"]*src=[\"']([^\"']*)[\"']",true) foreach(attachments,attachment) attachment.id = replaceall(attachment.link,"[^a-zA-Z0-9-]+","_") if (!contains(ids,attachment.id)) ids[size(ids)] = attachment.id msg.text = replaceall(msg.text,"src=[\"']\Q"&attachment.link&"\E[\"']","src=\"cid:"&attachment.id&"\"",true) found,attachment.text = match(attachment.link,"([^/]+)$") msg.attachments[size(msg.attachments)] = attachment end end end if (info.options["plain"].value) msg.mimetype = "text/plain" msg.text = html2text(msg.text) end return msg end #sub html2text(text) # local(body) # found,body = match(text,"<(?i:body)(?:\s+[^>]+|\s*)>(.*)") # if (found) # text = body # end # text = replaceall(text,"","") # text = replaceall(text,"<(?i:script)(?:\\s+[^>]+|\\s*)>(.*?)","") # text = replaceall(text,"<(?i:noscript)(?:\\s+[^>]+|\\s*)>(.*?)","") # text = replaceall(text,"\\s+"," ") # text = replaceall(text,"<(?i:br|td)(\\s+[^>]+|\\s*)>","\n") # text = replaceall(text,"<(?i:li)(\\s+[^>]+|\\s*)>","\n\t") # text = replaceall(text,"<(?i:p|h\\d|div)(\\s+[^>]+|\\s*)>","\n\n") # text = replaceall(text,"<[^'\">]*(\"[^\"]*\"[^'\">]*|'[^']*'[^'\">]*)*>","") # text = decode(text,true) # text = replaceall(text,"\n\\s+\n","\n\n") # text = replaceall(text,"\n +","\n") # text = replaceall(text,"\\A\\s+","") # return text #end sub delete(msgs) warning("You can't delete RSS feed messages!") return false end sub loginForSend(username,password) warning("You can't send RSS feeds!") return false end sub send(msg) warning("You can't send RSS feeds!") return false end sub update local(i,k,urls,url,status,page,numfound,feeds,newdescr,olddescr) i = 0 urls = split(info.options["updateUrl"].value,"\s*,\s*") foreach (urls,url) setbaseurl(url) returnif(isaborted(),false) setprogress((i+0.05)/size(urls),"reading update page "&url) status,page = get(url) if (status != 200) error("Invalid update URL for module rss") return false end returnif(isaborted(),false) setprogress((i+0.1)/size(urls),"finding feed descr. links on update page "&url) numfound,feeds[].link,feeds[].filename = matchall(page,"]*href=\"([^\"]*/(rss_[^\"]+\.properties))\"") k = 0 foreach (feeds,feed) returnif(isaborted(),false) setprogress((i+0.2+0.8*k/size(feeds))/size(urls),"checking "&feed.filename&"...") status,newdescr = get(feed.link) if (status == 200) olddescr = readfile(info.options["descrDir"].value&"/"&feed.filename) if (olddescr != newdescr) setprogress((i+0.2+0.8*(k+0.5)/size(feeds))/size(urls),"updating "&feed.filename&"...") writefile(info.options["descrDir"].value&"/"&feed.filename,newdescr) addsuccess(feed.filename) end end if (status != 200) adderror(feed.filename) end k = k+1 end i = i+1 end return true end sub logout #--- nothing to do end