simple scraper addon “regex list exceeds” problem

Hey Guys!

Sers Leute!

I am currently trying to get a very simple addon running with the limited code knowledge i have.

I took the Plugin chefkoch ( github.com/kodinerds/repo/blob…deo.chefkoch_de-2.0.7.zip ) as an example and rebuilt it, so it scrapes “http://www.multimedia.ethz.ch/speakers/d_arch” and gives me a list of semester child pages “class = “sub-lev3” “. When i click this links i want to get to to the list of videos in this semester. I can get until the point of choosing the semester but in the “def Listvideos” it appears to have a problem.

I think the first 3 lines are one which make problems, it tells list exceed failures in the kodi log.

Code:
content = getUrl(url)
       tbody = re.search('<tbody>(.*?)</tbody>', content, re.DOTALL).group(1)
       spl = tbody.split('<tr>')
    for i in range(1, len(spl), 1):
            entry = spl[i]
            match = re.compile('<li class=\'video\'><a href=\'(.+?)\'', re.DOTALL).findall(entry)
            url = match[0]
            match = re.compile('<span>(.+?)<', re.DOTALL).findall(entry)
            title = cleanTitle(match[0])
            addLink(title, url, 'playVideo', '', '')

the whole default.py-Code.

Code:
#!/usr/bin/python
    # -*- coding: utf-8 -*-
    import urllib
    import urllib2
    import socket
    import sys
    import re
    import xbmcplugin
    import xbmcgui
    import xbmcaddon
    socket.setdefaulttimeout(30)
    pluginhandle = int(sys.argv[1])
    settings = xbmcaddon.Addon(id='plugin.video.architektur')
    translation = settings.getLocalizedString
    forceViewMode = settings.getSetting("forceViewMode") == "true"
    viewMode = str(settings.getSetting("viewMode"))
    baseUrl = "http://www.multimedia.ethz.ch/speakers/d_arch/"
    def index():
      content = getUrl(baseUrl)
      spl = content.split('class="subnav-lev3"')
      for i in range(1, len(spl), 1):
            entry = spl[i]
            match = re.compile('href="(.+?)"', re.DOTALL).findall(entry)
            url = match[0]
            match = re.compile('title=".+?">(.+?)<', re.DOTALL).findall(entry)
            title = cleanTitle(match[0])
            addDir(title, url, 'listVideos', '')
      xbmcplugin.endOfDirectory(pluginhandle)
      if forceViewMode:
            xbmc.executebuiltin('Container.SetViewMode('+viewMode+')')
    def listVideos(url):
       content = getUrl(url)
       tbody = re.search('<tbody>(.*?)</tbody>', content, re.DOTALL).group(1)
       spl = tbody.split('<tr>')
       for i in range(1, len(spl), 1):
            entry = spl[i]
            match = re.compile('<li class=\'video\'><a href=\'(.+?)\'', re.DOTALL).findall(entry)
            url = match[0]
            match = re.compile('<span>(.+?)<', re.DOTALL).findall(entry)
            title = cleanTitle(match[0])
            addLink(title, url, 'playVideo', '', '')
       xbmcplugin.endOfDirectory(pluginhandle)
       if forceViewMode:
            xbmc.executebuiltin('Container.SetViewMode('+viewMode+')')
    def playVideo(url):
        listitem = xbmcgui.ListItem(path=url)
        xbmcplugin.setResolvedUrl(pluginhandle, True, listitem)
    def cleanTitle(title):
        title = title.replace("<", "<").replace(">", ">").replace("&", "&").replace("'", "\\").replace(""", "\"").replace("ß", "ß").replace("–", "-")
        title = title.replace("Ä", "Ä").replace("Ü", "Ü").replace("Ö", "Ö").replace("ä", "ä").replace("ü", "ü").replace("ö", "ö")
        title = title.strip()
        return title
    def getUrl(url):
        req = urllib2.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0')
        response = urllib2.urlopen(req)
        link = response.read()
        response.close()
        return link
    def parameters_string_to_dict(parameters):
        ''' Convert parameters encoded in a URL to a dict. '''
        paramDict = {}
        if parameters:
            paramPairs = parameters[1:].split("&")
            for paramsPair in paramPairs:
                paramSplits = paramsPair.split('=')
                if (len(paramSplits)) == 2:
                    paramDict[paramSplits[0]] = paramSplits[1]
        return paramDict
    def addLink(name, url, mode, iconimage, desc=""):
        u = sys.argv[0]+"?url="+urllib.quote_plus(url)+"&mode="+str(mode)
        ok = True
        liz = xbmcgui.ListItem(name, iconImage="DefaultVideo.png", thumbnailImage=iconimage)
        liz.setInfo(type="Video", infoLabels={"Title": name, "Plot": desc})
        liz.setProperty('IsPlayable', 'true')
        ok = xbmcplugin.addDirectoryItem(handle=int(sys.argv[1]), url=u, listitem=liz)
        return ok
    def addDir(name, url, mode, iconimage):
        u = sys.argv[0]+"?url="+urllib.quote_plus(url)+"&mode="+str(mode)
        ok = True
        liz = xbmcgui.ListItem(name, iconImage="DefaultFolder.png", thumbnailImage=iconimage)
        liz.setInfo(type="Video", infoLabels={"Title": name})
        ok = xbmcplugin.addDirectoryItem(handle=int(sys.argv[1]), url=u, listitem=liz, isFolder=True)
        return ok
    params = parameters_string_to_dict(sys.argv[2])
    mode = urllib.unquote_plus(params.get('mode', ''))
    url = urllib.unquote_plus(params.get('url', ''))
    if mode == 'listVideos':
        listVideos(url)
    elif mode == 'playVideo':
        playVideo(url)
    else:
        index()

Can you tell me what i am doing wrong in the syntax?

———————-

And when i finally get this code working my next project would be to do the same with this homepage: tube.tugraz.at/engage/ui/brows…e&category=courses&page=1
But here i cant use the source code of the webpage AND have to go through some pagination pages.

Do you have an idea where to start with this one?

cheers