| @ -0,0 +1,4 @@ | |||||
| credentials.json | |||||
| __pycache__ | |||||
| .vscode | |||||
| docker-compose.yml | |||||
| @ -0,0 +1,21 @@ | |||||
| # WordPress to HFR (forum.hardware.fr) converter | |||||
| ## Still plenty of work to do, really ugly in some places | |||||
| A `flask` route that does the following: | |||||
| - Receive webhooks from WordPress. | |||||
| - Convert the WordPress blogpost's HTML into BBcode. | |||||
| - Post the resulting BBcode to an HFR topic. | |||||
| ## Setup | |||||
| - WordPress must be set up to send the hook `publish_post` with fields `post_content`, `post_name`, and `post_url` to the URL defined by your network setup. Go to [https://<your-blog.wordpress.com>/wp-admin](https://<your-blog.wordpress.com>/wp-admin) `> Settings > Webhooks`. | |||||
| - This is packaged as a Docker container meant to be used together with the [Letsencrypt nginx proxy companion](https://github.com/JrCs/docker-letsencrypt-nginx-proxy-companion). This could easily be adjusted to run on a [GCP Cloud Function](https://cloud.google.com/functions/). WordPress doesn't require `https` so this could also easily run as a standalone service at home without much setup. | |||||
| - Look through source files for extra info. | |||||
| ## What works, what doesn't | |||||
| - Simple HTML tags should all work (note: this is HFR-centric, HFR doesn't support a number of BBcode tags, hence the questionable translation table compared with other BBcode implementations). | |||||
| - The code is ugly in some places and might explode anytime. | |||||
| - Colours are not supported yet. | |||||
| ## Be careful of... | |||||
| - HFR's agressive anti-spam. Possibly a good idea to adjust the code to not post when doing tests, or edit a post (instead of posting). To edit: modify `POST` to `/bdd.php` and pass a `numreponse` arg to `payload` with a post that belongs to you. | |||||
| @ -0,0 +1,19 @@ | |||||
| version: '3' | |||||
| services: | |||||
| listener: | |||||
| build: ./wordpress_webhook | |||||
| container_name: wordpress_webhook_listener | |||||
| environment: | |||||
| - VIRTUAL_HOST=<your.domain.com> | |||||
| - LETSENCRYPT_HOST=<your.domain.com> | |||||
| - LETSENCRYPT_EMAIL=<your@email.address> | |||||
| expose: | |||||
| - 54321 | |||||
| restart: unless-stopped | |||||
| networks: | |||||
| - letsencrypt_proxy | |||||
| networks: | |||||
| letsencrypt_proxy: | |||||
| external: true | |||||
| @ -0,0 +1,9 @@ | |||||
| FROM python:slim | |||||
| RUN pip install gunicorn flask requests bs4 | |||||
| COPY wp_wh.py converter.py hfr.py credentials.json ./ | |||||
| EXPOSE 54321 | |||||
| ENTRYPOINT ["/usr/local/bin/gunicorn", "-b", ":54321", "wp_wh:app"] | |||||
| @ -0,0 +1,158 @@ | |||||
| """A simple HTML to BBcode converter. Aimed to be used to convert WordPress | |||||
| HTML into `forum.hardware.fr` BBcode. Currently missing support for colours, | |||||
| and everything else might explode anytime. | |||||
| """ | |||||
| import html.parser | |||||
| from collections import namedtuple | |||||
| import types | |||||
| TRtuple = namedtuple('TRtuple', | |||||
| ['tags', 'txt_action'], | |||||
| defaults=[('', ''), None]) | |||||
| class HTMLtoBBcode(html.parser.HTMLParser): | |||||
| """Subclass of `html.parser.HTMLParser` that does the format conversion.""" | |||||
| def __init__(self): | |||||
| super().__init__() | |||||
| self.tree = [] | |||||
| self.output = '' | |||||
| self.tag_translate = { | |||||
| 'h2': TRtuple(('[b][u]', '[/u][/b]\n'), | |||||
| txt_action='capitalise'), | |||||
| 'h3': TRtuple(('[b][u]', '[/u][/b]\n'), | |||||
| txt_action='capitalise'), | |||||
| 'h4': TRtuple(('[b][u]', '[/u][/b]\n'), | |||||
| txt_action='capitalise'), | |||||
| 'p': TRtuple(('\n', '')), | |||||
| 'strong': TRtuple(('[b]', '[/b]')), | |||||
| 'em': TRtuple(('[i]', '[/i]')), | |||||
| 's': TRtuple(('[strike]', '[/strike]')), | |||||
| 'blockquote': TRtuple(('[quote]', '[/quote]')), | |||||
| 'code': TRtuple(('[cpp]', '[/cpp]')), | |||||
| 'ul': TRtuple(('\n', '')), | |||||
| 'ol': TRtuple(('\n', '')), | |||||
| 'li': TRtuple(('[*]', '\n')), | |||||
| 'a': TRtuple(('[url={href}]', '[/url]')), | |||||
| # `img` tags are not always closed, leading to issues, | |||||
| # therefore using a single tag for images is simpler | |||||
| 'img': TRtuple(('[img]{src}[/img]', '')), | |||||
| # `span` is here for underlines to be processed | |||||
| 'span': TRtuple(('', '')) | |||||
| } | |||||
| self.attr_translate = { | |||||
| 'style': {'text-decoration': {'underline': ('[u]', '[/u]')}, | |||||
| 'color': {}} | |||||
| } | |||||
| self.text_translate = { | |||||
| 'capitalise': str.upper | |||||
| } | |||||
| def handle_starttag(self, tag, attrs): | |||||
| # Nothing to do here? | |||||
| if not self.tag_translate.get(tag): | |||||
| return | |||||
| # As some tags are part of tag properties instead of being direct tags, | |||||
| # if they need to be closed, they cannot be collected easily by | |||||
| # `handle_endtag`. Therefore, all BBcode closing tags are added to the | |||||
| # `self.tree` list, and they will just be popped by `handle_endtag`. | |||||
| self.output += (self.tag_translate[tag].tags[0]. | |||||
| format(**dict(attrs))) | |||||
| self.tree.append(self.tag_translate[tag].tags[1]) | |||||
| # Keeping this commented just in case this is after all the right | |||||
| # way to exclude those... | |||||
| # # Do we have a link? In that case we skip this entire bit | |||||
| # if tag == 'a' and any('href' in _attr for _attr in attrs): | |||||
| # return | |||||
| # # Do we have an image? Already handled | |||||
| # if tag == 'img': | |||||
| # return | |||||
| # Should be a lot easier with the `if` block, but we might exclude some | |||||
| # tags... | |||||
| if not tag in self.tag_translate: | |||||
| for attr, val in attrs: | |||||
| # e.g. attrs = [('style', 'text-decoration:underline;'), | |||||
| # (...), (...)] | |||||
| # e.g. attrs = [('style', 'color:#4ac456;'), | |||||
| # ('class', 'has-text-color')] | |||||
| # | |||||
| # e.g. attr = 'style' | |||||
| # | |||||
| # e.g. val = 'text-decoration:underline;' | |||||
| # e.g. val = 'color:#4ac456;' | |||||
| # | |||||
| # e.g. property_ = 'text-decoration' | |||||
| # e.g. property_ = 'color' | |||||
| # | |||||
| # e.g. value = 'underline' | |||||
| # e.g. value = '#4ac456' | |||||
| # | |||||
| _parse_attr = val.strip(';').split(':') | |||||
| # Not everything can be stripped/split, hence the try block | |||||
| try: | |||||
| property_ = _parse_attr[0] | |||||
| value = _parse_attr[1] | |||||
| # Are we dealing with a color? Insert it to the translation | |||||
| # dict `self.attr_translate` | |||||
| if property_ == 'color': | |||||
| self.attr_translate['style']['color'].setdefault( | |||||
| value, | |||||
| ('[{}]'.format(value), '[/{}]'.format(value)) | |||||
| ) | |||||
| self.output += (self.attr_translate | |||||
| [attr][property_][value][0]) | |||||
| self.tree.append(self.attr_translate | |||||
| [attr][property_][value][1]) | |||||
| # Any other error to catch here? | |||||
| except IndexError: | |||||
| pass | |||||
| # If the data that follows the tag needs to be modified, we need to | |||||
| # pass the info somehow to `handle_data`. `self.text_translate` | |||||
| # will likely only contain methods as values, as opposed to strings | |||||
| # for normal tags, so it's easy to just add a method to `self.tree` | |||||
| # and look for methods in `handle_data`. | |||||
| if self.tag_translate[tag].txt_action: | |||||
| self.tree.append(self.text_translate | |||||
| [self.tag_translate[tag].txt_action]) | |||||
| def handle_endtag(self, tag): | |||||
| if self.tag_translate.get(tag): | |||||
| self.output += self.tree.pop() | |||||
| def handle_data(self, data): | |||||
| # TODO: check: reconversion to handle webhook input? that seems to | |||||
| # completely mess up text... why did I have that in the first place?! | |||||
| # _data = bytes(data, 'utf8').decode('unicode_escape') | |||||
| _data = data | |||||
| # | |||||
| # Otherwise this works fine for debugging with the webhook as a `str` | |||||
| # _data = data | |||||
| # In case the incoming data is just a series of new lines, discard them | |||||
| if set(_data) == {'\n'}: | |||||
| return | |||||
| # Check if last element from `self.tree` is a method. If yes, it means | |||||
| # we have a `txt_action` to perform against the `data`. | |||||
| # Currently the only method type is `types.MethodDescriptorType` but | |||||
| # possibly there will be more later. | |||||
| try: | |||||
| if isinstance(self.tree[-1], types.MethodDescriptorType): | |||||
| # self.tree.pop() is a method! | |||||
| _data = self.tree.pop()(_data) | |||||
| except IndexError: | |||||
| pass | |||||
| self.output += _data | |||||
| def feed(self, data): | |||||
| # Resets `output` to an empty string before calling `feed` | |||||
| self.output = '' | |||||
| super().feed(data) | |||||
| @ -0,0 +1,6 @@ | |||||
| { | |||||
| "user": "", | |||||
| "user_hash": "", | |||||
| "posturl": "", | |||||
| "pre_title": "" | |||||
| } | |||||
| @ -0,0 +1,52 @@ | |||||
| """Provides the `post_HFR` function that posts messages to HFR. | |||||
| You should get those ready: | |||||
| user (str): username | |||||
| user_hash (str): hashlib.md5(<plaintext_password>.encode()).hexdigest() | |||||
| posturl (str): the post URL you are responding to (any post in the | |||||
| topic will do) | |||||
| newpost (str): BBcode of the new post that's being posted | |||||
| Beware of throttling/anti-flood limitations. | |||||
| """ | |||||
| import urllib.parse | |||||
| import requests | |||||
| from bs4 import BeautifulSoup | |||||
| BASE = 'https://forum.hardware.fr' | |||||
| POST = '/bddpost.php' | |||||
| def post_HFR(user, user_hash, posturl, newpost): | |||||
| """Based on inputs, posts to HFR. Returns the resulting `requests` object. | |||||
| """ | |||||
| # Cookie preparation. 3 Ss for md_passs, yes. | |||||
| cookie = {'md_passs': user_hash, | |||||
| 'md_user': user} | |||||
| # Create HFR session | |||||
| HFR = requests.Session() | |||||
| HFR.cookies.update(cookie) | |||||
| # Retrieve page, we'll need the hash_check from it | |||||
| req = HFR.get(posturl) | |||||
| soup = BeautifulSoup(req.text, 'html.parser') | |||||
| # Parse post URL, we'll need to extract a bunch of values from it | |||||
| parsedURL = urllib.parse.parse_qs(posturl) | |||||
| # Prepare payload | |||||
| payload = {'hash_check': soup.find('input', | |||||
| {'name': 'hash_check'})['value'], | |||||
| 'post': parsedURL['post'][0], | |||||
| 'cat': parsedURL['cat'][0], | |||||
| 'verifrequet': 1100, | |||||
| 'sujet': soup.find('input', {'name': 'sujet'})['value'], | |||||
| 'content_form': newpost, | |||||
| 'pseudo': user} | |||||
| # Post payload | |||||
| post_req = HFR.post(BASE+POST, data=payload) | |||||
| return post_req | |||||
| @ -0,0 +1,82 @@ | |||||
| """Provides a flask route that receives webhooks from WordPress (expecting | |||||
| `post_content`, `post_name`, and `post_url`), attempts to convert into BBcode, | |||||
| then posts it to a `forum.hardware.fr` topic. | |||||
| HFR credentials are stored in a separate `credentials.json` JSON file. See | |||||
| `hfr.py` for details on how to generate the `user_hash`. `pre_title` can be | |||||
| anything, just add as many `\n`s as you wish at the end of the string. | |||||
| { | |||||
| "user": "", | |||||
| "user_hash": "", | |||||
| "posturl": "", | |||||
| "pre_title": "" | |||||
| } | |||||
| """ | |||||
| import logging | |||||
| import urllib.parse | |||||
| import json | |||||
| from flask import Flask, request | |||||
| from converter import HTMLtoBBcode | |||||
| from hfr import post_HFR | |||||
| app = Flask(__name__) | |||||
| CREDENTIALS_FILE = 'credentials.json' | |||||
| parser = HTMLtoBBcode() | |||||
| with open(CREDENTIALS_FILE, 'r') as fp: | |||||
| config = json.load(fp) | |||||
| @app.route('/wordpress_webhook', methods=['POST']) | |||||
| def webhook_handler(): | |||||
| """Function doing all the work.""" | |||||
| logger.info('got called, {}'.format(request)) | |||||
| webhook_data = urllib.parse.parse_qs(request.get_data(as_text=True)) | |||||
| logger.info(webhook_data) | |||||
| parser.feed(webhook_data['post_content'][0]) | |||||
| content = parser.output | |||||
| title = webhook_data['post_name'][0].replace('-', ' ') | |||||
| wordpress_url = webhook_data['post_url'][0] | |||||
| title_BB = '[url={url}][b]{title}[/b][/url]\n\n'.format( | |||||
| url=wordpress_url, | |||||
| title=title) | |||||
| post_BB = config['pre_title'] + title_BB + content | |||||
| logger.info(post_BB) | |||||
| outcome = post_HFR(config['user'], | |||||
| config['user_hash'], | |||||
| config['posturl'], | |||||
| post_BB) | |||||
| logger.info(outcome) | |||||
| logger.info(outcome.text) | |||||
| return '' | |||||
| def prepare_logger(logger_name=__name__): | |||||
| """Simple logger preparation function | |||||
| """ | |||||
| logger = logging.getLogger(logger_name) | |||||
| logger.setLevel(logging.INFO) | |||||
| handler = logging.StreamHandler() | |||||
| formatter = logging.Formatter( | |||||
| '{asctime} {name} {levelname:8s} {message}', | |||||
| style='{') | |||||
| handler.setFormatter(formatter) | |||||
| logger.addHandler(handler) | |||||
| return logger | |||||
| logger = prepare_logger() | |||||
| if __name__ == '__main__': | |||||
| app.run(host='0.0.0.0', port=54321, debug=True) | |||||