wattpad [PDF]

addCover(fileobj=story_cover). book.impl.description = HTML(story_description, encoding='utf-8') # TODO: not sure if thi

20 downloads 41 Views 46KB Size

Recommend Stories


WATTPAD STORIES [PDF]
Aug 31, 2011 - WATTPAD STORIES ... 3W8L: GO TO HELL-- http://www.wattpad.com/7346844-3-words-8-letters-go-to-hell-one-night-stand-with#!p=4. Best friends with benefits ... When Ms. Gangster meets her match -- http://www.wattpad.com/6686850-when-miss-

CeCeLib Stories | Wattpad SoftCopies [PDF]
Jul 26, 2017 - Related Soft Copies : blue_maiden Stories. POSSESSIVE 18: Pierce Rios Muller b... [GFFH Book 2] OFFICIALLY HIS GIRLF... Reyna ng Kamalasan by forgottenglim... .

Wattpad Tagalog Stories Completed Soft Copies Free Download [PDF]
Wattpad Tagalog Stories Completed Soft Copies Free Download - Malignant fibrous histiocytoma information including symptoms diagnosis misdiagnosis throw This video will genealogy Losing A Friend Death Quotes birth.

how to paste on wattpad? | Yahoo Answers [PDF]
Apr 9, 2015 - Highlight chapter one. CTRL + C. Go to Wattpad, click "new part," and paste (CTRL + V) the chapter into the bottom text box. Need any more help I'm "Insanegame27" on Wattpad.

Wattpad Tagalog Stories Completed Soft Copies Free Download [PDF]
Wattpad Tagalog Stories Completed Soft Copies Free Download - Malignant fibrous histiocytoma information including symptoms diagnosis misdiagnosis throw This video will genealogy Losing A Friend Death Quotes birth.

Transmedya Hikayeciliği'nde Wattpad Örneği ve Okur Tercihlerine Yönelik Bir Araştırma Wattpad
Before you speak, let your words pass through three gates: Is it true? Is it necessary? Is it kind?

Transmedya Hikayeciliği'nde Wattpad Örneği ve Okur Tercihlerine Yönelik Bir Araştırma Wattpad
The best time to plant a tree was 20 years ago. The second best time is now. Chinese Proverb

Wattpad Sample International Journal of Education
So many books, so little time. Frank Zappa

How To Select Text In Wattpad Video Download MP4, HD MP4, Full [PDF]
thumb. How to Upload A Story To Wattpad. thumb. How to copy a Wattpad Thread. thumb. Tutorial on How to Download a Story when the Author doesn't give a Soft Copy. thumb. How To Skip The Opening Story on Tap by Wattpad & Get To The Homepage (OLD VERSI

download pdf Creează PDF
You have survived, EVERY SINGLE bad day so far. Anonymous

Idea Transcript


de3sw2aq1 / wattpad-ebook-scraper

master

wattpad-ebook-scraper / scrape.py

de3sw2aq1 Allow passing either story or chapter urls to the scraper

Find file

Copy path

4b7eb57 Nov 11, 2015

2 contributors

Executable File 135 lines (101 sloc) 4.65 KB 1

#!/usr/bin/env python3

2 3

import sys

4

import io

5

import re

6 7

import requests

8

import dateutil.parser

9

from genshi.input import HTML

10

import smartypants

11 12

import ez_epub

13 14

# Setup session to not hit Android download app page

15

session = requests.session()

16

# No user agent. Wattpad now blocks all user agents containing "Python".

17

session.headers['User-Agent'] = ''

18 19

# Used by Android app normally

20

# Example parameters are what Android provides

21

API_STORYINFO = 'https://www.wattpad.com/api/v3/stories/' #9876543?drafts=0&include_deleted=1

22 23

# Used by website and Android app normally

24

API_STORYTEXT = 'https://www.wattpad.com/apiv2/storytext' # ?id=23456789

25

# Webpage uses a page parameter: ?id=23456789&page=1

26

# Android uses these parameters: ?id=23456789&increment_read_count=1&include_paragraph_id=1&output=text_zip

27

# Now (2015-06-15), returns HTML instead of JSON. output=json will get JSON again

28 29

API_CHAPTERINFO = 'https://www.wattpad.com/apiv2/info' # ?id=23456789

30 31

# Documented api

32

API_GETCATEGORIES = 'https://www.wattpad.com/apiv2/getcategories'

33 34

ILLEAGAL_FILENAME_CHARACTERS = str.maketrans(r'.:"/\|?*^', '-----------')

35 36

# Fixup the categories data, this could probably be cached too

37

categories = session.get(API_GETCATEGORIES).json()

38

categories = {int(k): v for k, v in categories.items()}

39 40

def download_story(story_id):

41

# TODO: probably use {'drafts': 0, 'include_deleted': 0}

42

storyinfo = session.get(API_STORYINFO + story_id, params={'drafts': 1, 'include_deleted': 1}).json()

43 44

story_title = storyinfo['title']

45

story_description = storyinfo['description']

46

story_createDate = dateutil.parser.parse(storyinfo['createDate'])

47

story_modifyDate = dateutil.parser.parse(storyinfo['modifyDate'])

48

story_author = storyinfo['user']['name']

49

story_categories = [categories[c] for c in storyinfo['categories'] if c in categories] # category can be 0

50

story_rating = storyinfo['rating'] # TODO: I think 4 is adult?

51

story_cover = io.BytesIO(session.get(storyinfo['cover']).content)

52

story_url = storyinfo['url']

53 54

print('Story "{story_title}": {story_id}'.format(story_title=story_title, story_id=story_id))

55 56

# Setup epub

57

book = ez_epub.Book()

58

book.title = story_title

59

book.authors = [story_author]

60

book.sections = []

61

book.impl.addCover(fileobj=story_cover)

62

book.impl.description = HTML(story_description, encoding='utf-8') # TODO: not sure if this is HTML or text

63

book.impl.url = story_url

64

book.impl.addMeta('publisher', 'Wattpad - scraped')

65

book.impl.addMeta('source', story_url)

66 67

for part in storyinfo['parts']:

68

chapter_title = part['title']

69 70

if part['draft']:

71

print('Skipping "{chapter_title}": {chapter_id}, part is draft'.format(chapter_title=chapter_title, chapter_id=chapter_id))

72

continue

73 74

if 'deleted' in part and part['deleted']:

75

print('Skipping "{chapter_title}": {chapter_id}, part is deleted'.format(chapter_title=chapter_title, chapter_id=chapter_id))

76

continue

77 78

chapter_id = part['id']

79 80

# TODO: could intelligently only redownload modified parts

81

chapter_modifyDate = dateutil.parser.parse(part['modifyDate'])

82 83

print('Downloading "{chapter_title}": {chapter_id}'.format(chapter_title=chapter_title, chapter_id=chapter_id))

84 85

chapter_html = session.get(API_STORYTEXT, params={'id': chapter_id, 'output': 'json'}).json()['text']

86

chapter_html = smartypants.smartypants(chapter_html)

87 88 89

section = ez_epub.Section()

90

section.html = HTML(chapter_html, encoding='utf-8')

91

section.title = chapter_title

92

book.sections.append(section)

93 94

print('Saving epub')

95

book.make('./{title}'.format(title=book.title.translate(ILLEAGAL_FILENAME_CHARACTERS)))

96 97 98

def get_story_id(url):

99

# Extract the id number from the url

100

match = re.search(r'\d+', url)

101

if not match:

102

return None

103 104

# Check if it's a valid id of a story

105

url_id = match.group()

106

storyinfo_req = session.get(API_STORYINFO + url_id)

107

if storyinfo_req.ok:

108

return url_id

109 110

# If not, check if it's a chapter id and retrieve the story id

111

chapterinfo_req = session.get(API_CHAPTERINFO, params={'id': url_id})

112

if not chapterinfo_req.ok:

113

return None

114

story_url = chapterinfo_req.json()['url']

115

story_id = re.search(r'\d+', story_url).group()

116

return story_id

117 118 119

def main():

120

if sys.argv[1:]:

121

story_urls = sys.argv[1:]

122

else:

123

story_urls = sys.stdin

124 125

for story_url in story_urls:

126

story_id = get_story_id(story_url)

127

if story_id:

128

download_story(story_id)

129

else:

130

print('ERROR: could not retrieve story', story_url)

131 132 133

if __name__ == '__main__':

134

main()

Smile Life

When life gives you a hundred reasons to cry, show life that you have a thousand reasons to smile

Get in touch

© Copyright 2015 - 2024 PDFFOX.COM - All rights reserved.