To create the wordCloud image in the previous post, I used this following Python script to pull all the data from the posts in my blog.
#import modules
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods.posts import GetPosts
from wordpress_xmlrpc import WordPressTerm
from wordpress_xmlrpc.methods import posts
from os import path
from decryption import load_key, decrypt_message
import re
#define a function to remove html tags from the posts
def remove_html_tags(text):
clean = re.compile('<.*?>')
return re.sub(clean, '', text)
#A password is encrypted and I am importing my decryption function to decrypt the password in this script
password = decrypt_message(b'gAAAAAAAABgZSTvBQDHvkG_7AFsWtg8IJXCbMb1jRVtklMQFGpfLTCox0wCAt-ktahUZ-YwlcydU-E2OsZ7uQd7jNZW2lSwQ==')
#connect to wordpress to get my post
client = Client('https://kennyl.us/xmlrpc.php', '[Enter your user name here]', password)
data = []
offset = 0
increment = 100
while True:
wp_posts = client.call(posts.GetPosts({'number': increment, 'offset': offset}))
if len(wp_posts) == 0:
break # Break when no more posts returned
#open temp.txt and write data from post into txt file
with open("temp.txt", "w", encoding ="utf-8") as file:
for post in wp_posts:
file.write(post.title)
file.write(post.content)
offset = offset + increment
#read from temp.txt to remove HTML tags
dirname = path.dirname(__file__)
original_text = open(path.join(dirname, 'temp.txt'),'r',encoding='utf-8').read()
text_without_html = remove_html_tags(original_text)
#open wordcloud.txt and write everything from text_without_html into this wordcloud.txt
with open("wordcloud.txt", "w", encoding="utf-8") as file:
file.write(text_without_html)
#we then use this wordcloud.txt to generate the wordcloud image with the Python script in my previous post.