import requests, re, json get_captions = lambda content: [post['caption'] for post in content['response']['posts'] if 'caption' in post] get_domains = lambda captions: map(lambda caption: re.sub('<.*?>', '', caption.split('</p>')[0]), captions) def process(url): offset = 0 domains = [] while True: resp = requests.get(url.format(offset)) if resp.status_code == 404: return domains domains.extend(get_domains(get_captions(json.loads(resp.content)))) offset += 20 domains = process('http://api.tumblr.com/v2/blog/plaintextoffenders.com/posts?notes_info=true&api_key=<mykey>&offset={0}') with open('/tmp/domains', 'w') as dfile: dfile.write('\n'.join(domains))