Python to go through all the posts from a WordPress Blog

To create the wordCloud image in the previous post, I used this following Python script to pull all the data from the posts in my blog.

#import modules
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods.posts import GetPosts
from wordpress_xmlrpc import WordPressTerm
from wordpress_xmlrpc.methods import posts
from os import path
from decryption import load_key, decrypt_message
import re

#define a function to remove html tags from the posts
def remove_html_tags(text):

    clean = re.compile('<.*?>')
    return re.sub(clean, '', text)

#A password is encrypted and I am importing my decryption function to decrypt the password in this script
password = decrypt_message(b'gAAAAAAAABgZSTvBQDHvkG_7AFsWtg8IJXCbMb1jRVtklMQFGpfLTCox0wCAt-ktahUZ-YwlcydU-E2OsZ7uQd7jNZW2lSwQ==')

#connect to wordpress to get my post
client = Client('https://kennyl.us/xmlrpc.php', '[Enter your user name here]', password)
data = []
offset = 0
increment = 100
while True:
    wp_posts = client.call(posts.GetPosts({'number': increment, 'offset': offset}))
    if len(wp_posts) == 0:
        break # Break when no more posts returned
#open temp.txt and write data from post into txt file
    with open("temp.txt", "w", encoding ="utf-8") as file:
        for post in wp_posts:
            file.write(post.title)
            file.write(post.content)
    offset = offset + increment

#read from temp.txt to remove HTML tags
dirname = path.dirname(__file__)
original_text = open(path.join(dirname, 'temp.txt'),'r',encoding='utf-8').read()
text_without_html = remove_html_tags(original_text)

#open wordcloud.txt and write everything from text_without_html into this wordcloud.txt
with open("wordcloud.txt", "w", encoding="utf-8") as file:
    file.write(text_without_html)

#we then use this wordcloud.txt to generate the wordcloud image with the Python script in my previous post.

Share this:

Related

Leave a comment Cancel reply