Push all old addons

Push of all my old addons that are possibly well broken...
This commit is contained in:
Oli Passey
2017-12-03 19:26:03 +00:00
parent 4b3705e726
commit 671b609c17
57 changed files with 1385 additions and 1 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@@ -0,0 +1,141 @@
'''
academicearth.api
~~~~~~~~~~~~~~~~~
This module contains the API classes and method to parse information from
the Academic Earth website.
'''
from scraper import (get_subjects, get_courses, get_subject_metadata,
get_course_metadata, get_lecture_metadata)
class AcademicEarth(object):
'''The main API object. Useful as a starting point to get available
subjects.
'''
def __init__(self):
pass
def get_subjects(self):
'''Returns a list of subjects available on the website.'''
return [Subject(**info) for info in get_subjects()]
class Subject(object):
'''Object representing an Academic Earth subject.'''
def __init__(self, url, name=None):
self.url = url
self._name = name
self._courses = None
self._lectures = None
self._loaded = False
@classmethod
def from_url(cls, url):
return cls(url=url)
def __repr__(self):
return u"<Subject '%s'>" % self.name
def _load_metadata(self):
resp = get_subject_metadata(self.url)
if not self._name:
self._name = resp['name']
self._courses = [Course(**info) for info in resp['courses']]
self._lectures = [Lecture(**info) for info in resp['lectures']]
self._description = resp['description']
self._loaded = True
@property
def name(self):
'''Subject name'''
if not self._name:
self._load_metadata()
return self._name
@property
def courses(self):
'''List of courses available for this subject'''
if not self._loaded:
self._load_metadata()
return self._courses
@property
def lectures(self):
'''List of lectures available for this subject'''
if not self._loaded:
self._load_metadata()
return self._lectures
class Course(object):
def __init__(self, url, name=None, **kwargs):
self.url = url
self._name = name
self._loaded = False
self._lectures = None
@classmethod
def from_url(cls, url):
return cls(url=url)
def __repr__(self):
return u"<Course '%s'>" % self.name
def _load_metadata(self):
resp = get_course_metadata(self.url)
if not self._name:
self._name = resp['name']
self._lectures = [Lecture(**info) for info in resp['lectures']]
self._loaded = True
@property
def name(self):
if not self._name:
self._load_metadata()
return self._name
@property
def lectures(self):
if not self._loaded:
self._load_metadata()
return self._lectures
class Lecture(object):
def __init__(self, url, name=None, **kwargs):
self.url = url
self._name = name
self._loaded = False
@classmethod
def from_url(cls, url):
return cls(url=url)
def __repr__(self):
return u"<Lecture '%s'>" % self.name
def _load_metadata(self):
resp = get_lecture_metadata(self.url)
if not self._name:
self._name = resp['name']
self._youtube_id = resp['youtube_id']
self._loaded = True
@property
def name(self):
if not self._name:
self._load_metadata()
return self._name
@property
def youtube_id(self):
if not self._loaded:
self._load_metadata()
return self._youtube_id

View File

@@ -0,0 +1,151 @@
'''
academicearth.scraper
~~~~~~~~~~~~~~~~~~~~~
This module contains some functions which do the website scraping for the
API module. You shouldn't have to use this module directly.
'''
import re
from urllib2 import urlopen
from urlparse import urljoin
from BeautifulSoup import BeautifulSoup as BS
BASE_URL = 'http://www.academicearth.org'
def _url(path):
'''Returns a full url for the given path'''
return urljoin(BASE_URL, path)
def get(url):
'''Performs a GET request for the given url and returns the response'''
conn = urlopen(url)
resp = conn.read()
conn.close()
return resp
def _html(url):
'''Downloads the resource at the given url and parses via BeautifulSoup'''
return BS(get(url), convertEntities=BS.HTML_ENTITIES)
def make_showall_url(url):
'''Takes an api url and appends info to the path to force the page to
return all entries instead of paginating.
'''
if not url.endswith('/'):
url += '/'
return url + 'page:1/show:500'
def get_subjects():
'''Returns a list of subjects for the website. Each subject is a dict with
keys of 'name' and 'url'.
'''
url = _url('subjects')
html = _html(url)
subjs = html.findAll('a',
{'href': lambda attr_value: attr_value.startswith('/subjects/')
and len(attr_value) > len('/subjects/')})
# subjs will contain some duplicates so we will key on url
items = []
urls = set()
for subj in subjs:
url = _url(subj['href'])
if url not in urls:
urls.add(url)
items.append({
'name': subj.string,
'url': url,
})
# filter out any items that didn't parse correctly
return [item for item in items if item['name'] and item['url']]
def get_subject_metadata(subject_url):
'''Returns metadata for a subject parsed from the given url'''
html = _html(make_showall_url(subject_url))
name = get_subject_name(html)
courses = get_courses(html)
lectures = get_lectures(html)
desc = get_subject_description(html)
return {
'name': name,
'courses': courses,
'lectures': lectures,
'description': desc,
}
def get_subject_name(html):
return html.find('article').h1.text
def get_course_name(html):
return html.find('section', {'class': 'pagenav'}).span.text
def get_lecture_name(html):
return html.find('section', {'class': 'pagenav'}).span.text
def get_subject_description(html):
desc_nodes = html.find('article').findAll('span')
return '\n'.join(node.text.strip() for node in desc_nodes)
def _get_courses_or_lectures(class_type, html):
'''class_type can be 'course' or 'lecture'.'''
nodes = html.findAll('div', {'class': class_type})
items = [{
'name': node.h3.text,
'url': _url(node.a['href']),
'icon': node.img['src'],
#'university': '',
#'speaker': '',
} for node in nodes]
return items
def get_lectures(html):
return _get_courses_or_lectures('lecture', html)
def get_courses(html):
return _get_courses_or_lectures('course', html)
def get_course_metadata(course_url):
html = _html(make_showall_url(course_url))
lectures = get_lectures(html)
name = get_course_name(html)
return {
'lectures': lectures,
'name': name,
}
def get_lecture_metadata(lecture_url):
html = _html(lecture_url)
name = get_lecture_name(html)
youtube_id = parse_youtube_id(html)
return {
'name': name,
'youtube_id': youtube_id
}
def parse_youtube_id(html):
embed = html.find('embed')
yt_ptn = re.compile(r'http://www.youtube.com/v/(.+?)\?')
match = yt_ptn.search(embed['src'])
if match:
return match.group(1)
return None