Browse Source

new initial commit

master
foormea 6 years ago
commit
30ef187e81
8 changed files with 351 additions and 0 deletions
  1. +4
    -0
      .gitignore
  2. +21
    -0
      README.md
  3. +19
    -0
      dock/docker-compose.yml.template
  4. +9
    -0
      dock/wordpress_webhook/Dockerfile
  5. +158
    -0
      dock/wordpress_webhook/converter.py
  6. +6
    -0
      dock/wordpress_webhook/credentials.json.template
  7. +52
    -0
      dock/wordpress_webhook/hfr.py
  8. +82
    -0
      dock/wordpress_webhook/wp_wh.py

+ 4
- 0
.gitignore View File

@ -0,0 +1,4 @@
credentials.json
__pycache__
.vscode
docker-compose.yml

+ 21
- 0
README.md View File

@ -0,0 +1,21 @@
# WordPress to HFR (forum.hardware.fr) converter
## Still plenty of work to do, really ugly in some places
A `flask` route that does the following:
- Receive webhooks from WordPress.
- Convert the WordPress blogpost's HTML into BBcode.
- Post the resulting BBcode to an HFR topic.
## Setup
- WordPress must be set up to send the hook `publish_post` with fields `post_content`, `post_name`, and `post_url` to the URL defined by your network setup. Go to [https://<your-blog.wordpress.com>/wp-admin](https://<your-blog.wordpress.com>/wp-admin) `> Settings > Webhooks`.
- This is packaged as a Docker container meant to be used together with the [Letsencrypt nginx proxy companion](https://github.com/JrCs/docker-letsencrypt-nginx-proxy-companion). This could easily be adjusted to run on a [GCP Cloud Function](https://cloud.google.com/functions/). WordPress doesn't require `https` so this could also easily run as a standalone service at home without much setup.
- Look through source files for extra info.
## What works, what doesn't
- Simple HTML tags should all work (note: this is HFR-centric, HFR doesn't support a number of BBcode tags, hence the questionable translation table compared with other BBcode implementations).
- The code is ugly in some places and might explode anytime.
- Colours are not supported yet.
## Be careful of...
- HFR's agressive anti-spam. Possibly a good idea to adjust the code to not post when doing tests, or edit a post (instead of posting). To edit: modify `POST` to `/bdd.php` and pass a `numreponse` arg to `payload` with a post that belongs to you.

+ 19
- 0
dock/docker-compose.yml.template View File

@ -0,0 +1,19 @@
version: '3'
services:
listener:
build: ./wordpress_webhook
container_name: wordpress_webhook_listener
environment:
- VIRTUAL_HOST=<your.domain.com>
- LETSENCRYPT_HOST=<your.domain.com>
- LETSENCRYPT_EMAIL=<your@email.address>
expose:
- 54321
restart: unless-stopped
networks:
- letsencrypt_proxy
networks:
letsencrypt_proxy:
external: true

+ 9
- 0
dock/wordpress_webhook/Dockerfile View File

@ -0,0 +1,9 @@
FROM python:slim
RUN pip install gunicorn flask requests bs4
COPY wp_wh.py converter.py hfr.py credentials.json ./
EXPOSE 54321
ENTRYPOINT ["/usr/local/bin/gunicorn", "-b", ":54321", "wp_wh:app"]

+ 158
- 0
dock/wordpress_webhook/converter.py View File

@ -0,0 +1,158 @@
"""A simple HTML to BBcode converter. Aimed to be used to convert WordPress
HTML into `forum.hardware.fr` BBcode. Currently missing support for colours,
and everything else might explode anytime.
"""
import html.parser
from collections import namedtuple
import types
TRtuple = namedtuple('TRtuple',
['tags', 'txt_action'],
defaults=[('', ''), None])
class HTMLtoBBcode(html.parser.HTMLParser):
"""Subclass of `html.parser.HTMLParser` that does the format conversion."""
def __init__(self):
super().__init__()
self.tree = []
self.output = ''
self.tag_translate = {
'h2': TRtuple(('[b][u]', '[/u][/b]\n'),
txt_action='capitalise'),
'h3': TRtuple(('[b][u]', '[/u][/b]\n'),
txt_action='capitalise'),
'h4': TRtuple(('[b][u]', '[/u][/b]\n'),
txt_action='capitalise'),
'p': TRtuple(('\n', '')),
'strong': TRtuple(('[b]', '[/b]')),
'em': TRtuple(('[i]', '[/i]')),
's': TRtuple(('[strike]', '[/strike]')),
'blockquote': TRtuple(('[quote]', '[/quote]')),
'code': TRtuple(('[cpp]', '[/cpp]')),
'ul': TRtuple(('\n', '')),
'ol': TRtuple(('\n', '')),
'li': TRtuple(('[*]', '\n')),
'a': TRtuple(('[url={href}]', '[/url]')),
# `img` tags are not always closed, leading to issues,
# therefore using a single tag for images is simpler
'img': TRtuple(('[img]{src}[/img]', '')),
# `span` is here for underlines to be processed
'span': TRtuple(('', ''))
}
self.attr_translate = {
'style': {'text-decoration': {'underline': ('[u]', '[/u]')},
'color': {}}
}
self.text_translate = {
'capitalise': str.upper
}
def handle_starttag(self, tag, attrs):
# Nothing to do here?
if not self.tag_translate.get(tag):
return
# As some tags are part of tag properties instead of being direct tags,
# if they need to be closed, they cannot be collected easily by
# `handle_endtag`. Therefore, all BBcode closing tags are added to the
# `self.tree` list, and they will just be popped by `handle_endtag`.
self.output += (self.tag_translate[tag].tags[0].
format(**dict(attrs)))
self.tree.append(self.tag_translate[tag].tags[1])
# Keeping this commented just in case this is after all the right
# way to exclude those...
# # Do we have a link? In that case we skip this entire bit
# if tag == 'a' and any('href' in _attr for _attr in attrs):
# return
# # Do we have an image? Already handled
# if tag == 'img':
# return
# Should be a lot easier with the `if` block, but we might exclude some
# tags...
if not tag in self.tag_translate:
for attr, val in attrs:
# e.g. attrs = [('style', 'text-decoration:underline;'),
# (...), (...)]
# e.g. attrs = [('style', 'color:#4ac456;'),
# ('class', 'has-text-color')]
#
# e.g. attr = 'style'
#
# e.g. val = 'text-decoration:underline;'
# e.g. val = 'color:#4ac456;'
#
# e.g. property_ = 'text-decoration'
# e.g. property_ = 'color'
#
# e.g. value = 'underline'
# e.g. value = '#4ac456'
#
_parse_attr = val.strip(';').split(':')
# Not everything can be stripped/split, hence the try block
try:
property_ = _parse_attr[0]
value = _parse_attr[1]
# Are we dealing with a color? Insert it to the translation
# dict `self.attr_translate`
if property_ == 'color':
self.attr_translate['style']['color'].setdefault(
value,
('[{}]'.format(value), '[/{}]'.format(value))
)
self.output += (self.attr_translate
[attr][property_][value][0])
self.tree.append(self.attr_translate
[attr][property_][value][1])
# Any other error to catch here?
except IndexError:
pass
# If the data that follows the tag needs to be modified, we need to
# pass the info somehow to `handle_data`. `self.text_translate`
# will likely only contain methods as values, as opposed to strings
# for normal tags, so it's easy to just add a method to `self.tree`
# and look for methods in `handle_data`.
if self.tag_translate[tag].txt_action:
self.tree.append(self.text_translate
[self.tag_translate[tag].txt_action])
def handle_endtag(self, tag):
if self.tag_translate.get(tag):
self.output += self.tree.pop()
def handle_data(self, data):
# TODO: check: reconversion to handle webhook input? that seems to
# completely mess up text... why did I have that in the first place?!
# _data = bytes(data, 'utf8').decode('unicode_escape')
_data = data
#
# Otherwise this works fine for debugging with the webhook as a `str`
# _data = data
# In case the incoming data is just a series of new lines, discard them
if set(_data) == {'\n'}:
return
# Check if last element from `self.tree` is a method. If yes, it means
# we have a `txt_action` to perform against the `data`.
# Currently the only method type is `types.MethodDescriptorType` but
# possibly there will be more later.
try:
if isinstance(self.tree[-1], types.MethodDescriptorType):
# self.tree.pop() is a method!
_data = self.tree.pop()(_data)
except IndexError:
pass
self.output += _data
def feed(self, data):
# Resets `output` to an empty string before calling `feed`
self.output = ''
super().feed(data)

+ 6
- 0
dock/wordpress_webhook/credentials.json.template View File

@ -0,0 +1,6 @@
{
"user": "",
"user_hash": "",
"posturl": "",
"pre_title": ""
}

+ 52
- 0
dock/wordpress_webhook/hfr.py View File

@ -0,0 +1,52 @@
"""Provides the `post_HFR` function that posts messages to HFR.
You should get those ready:
user (str): username
user_hash (str): hashlib.md5(<plaintext_password>.encode()).hexdigest()
posturl (str): the post URL you are responding to (any post in the
topic will do)
newpost (str): BBcode of the new post that's being posted
Beware of throttling/anti-flood limitations.
"""
import urllib.parse
import requests
from bs4 import BeautifulSoup
BASE = 'https://forum.hardware.fr'
POST = '/bddpost.php'
def post_HFR(user, user_hash, posturl, newpost):
"""Based on inputs, posts to HFR. Returns the resulting `requests` object.
"""
# Cookie preparation. 3 Ss for md_passs, yes.
cookie = {'md_passs': user_hash,
'md_user': user}
# Create HFR session
HFR = requests.Session()
HFR.cookies.update(cookie)
# Retrieve page, we'll need the hash_check from it
req = HFR.get(posturl)
soup = BeautifulSoup(req.text, 'html.parser')
# Parse post URL, we'll need to extract a bunch of values from it
parsedURL = urllib.parse.parse_qs(posturl)
# Prepare payload
payload = {'hash_check': soup.find('input',
{'name': 'hash_check'})['value'],
'post': parsedURL['post'][0],
'cat': parsedURL['cat'][0],
'verifrequet': 1100,
'sujet': soup.find('input', {'name': 'sujet'})['value'],
'content_form': newpost,
'pseudo': user}
# Post payload
post_req = HFR.post(BASE+POST, data=payload)
return post_req

+ 82
- 0
dock/wordpress_webhook/wp_wh.py View File

@ -0,0 +1,82 @@
"""Provides a flask route that receives webhooks from WordPress (expecting
`post_content`, `post_name`, and `post_url`), attempts to convert into BBcode,
then posts it to a `forum.hardware.fr` topic.
HFR credentials are stored in a separate `credentials.json` JSON file. See
`hfr.py` for details on how to generate the `user_hash`. `pre_title` can be
anything, just add as many `\n`s as you wish at the end of the string.
{
"user": "",
"user_hash": "",
"posturl": "",
"pre_title": ""
}
"""
import logging
import urllib.parse
import json
from flask import Flask, request
from converter import HTMLtoBBcode
from hfr import post_HFR
app = Flask(__name__)
CREDENTIALS_FILE = 'credentials.json'
parser = HTMLtoBBcode()
with open(CREDENTIALS_FILE, 'r') as fp:
config = json.load(fp)
@app.route('/wordpress_webhook', methods=['POST'])
def webhook_handler():
"""Function doing all the work."""
logger.info('got called, {}'.format(request))
webhook_data = urllib.parse.parse_qs(request.get_data(as_text=True))
logger.info(webhook_data)
parser.feed(webhook_data['post_content'][0])
content = parser.output
title = webhook_data['post_name'][0].replace('-', ' ')
wordpress_url = webhook_data['post_url'][0]
title_BB = '[url={url}][b]{title}[/b][/url]\n\n'.format(
url=wordpress_url,
title=title)
post_BB = config['pre_title'] + title_BB + content
logger.info(post_BB)
outcome = post_HFR(config['user'],
config['user_hash'],
config['posturl'],
post_BB)
logger.info(outcome)
logger.info(outcome.text)
return ''
def prepare_logger(logger_name=__name__):
"""Simple logger preparation function
"""
logger = logging.getLogger(logger_name)
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
formatter = logging.Formatter(
'{asctime} {name} {levelname:8s} {message}',
style='{')
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
logger = prepare_logger()
if __name__ == '__main__':
app.run(host='0.0.0.0', port=54321, debug=True)

Loading…
Cancel
Save