import praw, textwrap, configparser from time import sleep from prawcore import NotFound import datetime as dt import sys import os home = os.path.dirname(os.path.abspath(__file__)) os.chdir(home) reddit = praw.Reddit() HTMLHEAD = f'{home}/htmlhead.html' HTMLTAIL = f'{home}/htmltail.html' NAME = 'HTML' def listing(subs, posts): s = 'Subreddit listing:' i = 0 for sub in subs: s += '
'+str(i+1)+'-'+str(i+posts)+' r/'+sub i += posts return s+'

' def sub_exists(sub): exists = True try: reddit.subreddits.search_by_name(sub, exact=True) except NotFound: exists = False return exists def formatstr(s): try: return s.encode('ascii', errors='ignore').decode() except: return '

ENCODING ERROR

' def bar(amount, total, width, unit=''): left = int(width/total*amount) right = width-left-1 if (left >= width): print('\r['+'='*width+'] '+str(amount)+'/'+str(total)+('' if unit == '' else ' '+unit)+' ('+str(round(amount/total*100, 1))+'%)', end='') return print('\r['+'='*left+'>'+' '*right+'] '+str(amount)+'/'+str(total)+('' if unit == '' else ' '+unit)+' ('+str(round(amount/total*100, 1))+'%)', end='') def dateStr(date): d = str(date.day).zfill(2) m = str(date.month).zfill(2) y = str(date.year).zfill(4) return y+'-'+m+'-'+d print('Subreddit Scrapper by Brendan Westley\n') if len(sys.argv) <= 1: config = configparser.ConfigParser() config.read('quicke.ini') subnames = config[NAME]['subreddits'].split(', ') numitems = int(config[NAME]['posts']) fn = config[NAME]['file'].replace('|D', dateStr(dt.date.today())) elif sys.argv[1] in ['-h', '-?', '/h', '/?']: print('''Downloads posts from reddit and compiles a human readable html file containing them. Usage: subreddits count filename subreddits: comma-space seperated list of subreddits (enclose in double quotes) count: number of posts per subreddit filename: output file name; |D will be replaced with yyyy-mm-dd date (quote filename)''') exit(0) else: subnames = sys.argv[1].split(', ') numitems = int(sys.argv[2]) fn = sys.argv[3].replace('|D', dateStr(dt.date.today())) fn = f'{home}/{fn}' f = open(HTMLHEAD, 'r') head = f.read() f.close() f = open(HTMLTAIL, 'r') tail = f.read() f.close() f = open(fn, 'w') f.write(head+listing(subnames, numitems)+'') f.close() i = 0 for subname in subnames: subreddit = reddit.subreddit(subname) f = open(fn, 'a') print('\nRetreving from', subname) j = 0 for post in subreddit.hot(limit=numitems): bar(j, numitems, 30, 'posts') i += 1 j += 1 try: html = ('