Python - Little Wordcloud
Do you like Word Clouds ?
I do …!
following a litte Script which Parse a Website and build a appropriate Word Cloud
Script
mkdir ~/mywordcloud; cd ~/mywordcloud
cat <<'EOF' > main.py
import fire
import matplotlib.pyplot as plt
import pandas as pd
import re
import requests
from bs4 import BeautifulSoup
from wordcloud import STOPWORDS, WordCloud
def gen_cloud_tag(url: str = "https://blog.stoege.net"):
# add https
if not url.startswith("https://"):
url = "https://" + url
# get Webpage
response = requests.get(url, timeout=5, allow_redirects=True)
soup = BeautifulSoup(response.text, "html.parser")
words = soup.get_text()
# split with multiple delimiters
words = re.split(r"[\n\r]", words)
# build Dataframe
df = pd.DataFrame(words)
# Stop Words
comment_words = ""
stopwords = set(STOPWORDS)
# iterate
for val in df.values:
# typecaste each val to string
val = str(val)
# split the value
tokens = val.split()
# Converts each token into lowercase
for i in range(len(tokens)):
tokens[i] = tokens[i].lower()
comment_words += " ".join(tokens) + " "
# Build Wordcloud
wordcloud = WordCloud(
width=800,
height=800,
background_color="white",
stopwords=stopwords,
min_font_size=10,
).generate(comment_words)
# Build Image
plt.figure(figsize=(8, 8), facecolor=None)
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad=0)
# show Image
plt.show()
if __name__ == "__main__":
fire.Fire(gen_cloud_tag)
EOF
Init Project
you need a few python libraries. use some virtual env like venv, poetry or whatever your want