| @ -0,0 +1,4 @@ | |||
| credentials.json | |||
| __pycache__ | |||
| .vscode | |||
| docker-compose.yml | |||
| @ -0,0 +1,21 @@ | |||
| # WordPress to HFR (forum.hardware.fr) converter | |||
| ## Still plenty of work to do, really ugly in some places | |||
| A `flask` route that does the following: | |||
| - Receive webhooks from WordPress. | |||
| - Convert the WordPress blogpost's HTML into BBcode. | |||
| - Post the resulting BBcode to an HFR topic. | |||
| ## Setup | |||
| - WordPress must be set up to send the hook `publish_post` with fields `post_content`, `post_name`, and `post_url` to the URL defined by your network setup. Go to [https://<your-blog.wordpress.com>/wp-admin](https://<your-blog.wordpress.com>/wp-admin) `> Settings > Webhooks`. | |||
| - This is packaged as a Docker container meant to be used together with the [Letsencrypt nginx proxy companion](https://github.com/JrCs/docker-letsencrypt-nginx-proxy-companion). This could easily be adjusted to run on a [GCP Cloud Function](https://cloud.google.com/functions/). WordPress doesn't require `https` so this could also easily run as a standalone service at home without much setup. | |||
| - Look through source files for extra info. | |||
| ## What works, what doesn't | |||
| - Simple HTML tags should all work (note: this is HFR-centric, HFR doesn't support a number of BBcode tags, hence the questionable translation table compared with other BBcode implementations). | |||
| - The code is ugly in some places and might explode anytime. | |||
| - Colours are not supported yet. | |||
| ## Be careful of... | |||
| - HFR's agressive anti-spam. Possibly a good idea to adjust the code to not post when doing tests, or edit a post (instead of posting). To edit: modify `POST` to `/bdd.php` and pass a `numreponse` arg to `payload` with a post that belongs to you. | |||
| @ -0,0 +1,19 @@ | |||
| version: '3' | |||
| services: | |||
| listener: | |||
| build: ./wordpress_webhook | |||
| container_name: wordpress_webhook_listener | |||
| environment: | |||
| - VIRTUAL_HOST=<your.domain.com> | |||
| - LETSENCRYPT_HOST=<your.domain.com> | |||
| - LETSENCRYPT_EMAIL=<your@email.address> | |||
| expose: | |||
| - 54321 | |||
| restart: unless-stopped | |||
| networks: | |||
| - letsencrypt_proxy | |||
| networks: | |||
| letsencrypt_proxy: | |||
| external: true | |||
| @ -0,0 +1,9 @@ | |||
| FROM python:slim | |||
| RUN pip install gunicorn flask requests bs4 | |||
| COPY wp_wh.py converter.py hfr.py credentials.json ./ | |||
| EXPOSE 54321 | |||
| ENTRYPOINT ["/usr/local/bin/gunicorn", "-b", ":54321", "wp_wh:app"] | |||
| @ -0,0 +1,158 @@ | |||
| """A simple HTML to BBcode converter. Aimed to be used to convert WordPress | |||
| HTML into `forum.hardware.fr` BBcode. Currently missing support for colours, | |||
| and everything else might explode anytime. | |||
| """ | |||
| import html.parser | |||
| from collections import namedtuple | |||
| import types | |||
| TRtuple = namedtuple('TRtuple', | |||
| ['tags', 'txt_action'], | |||
| defaults=[('', ''), None]) | |||
| class HTMLtoBBcode(html.parser.HTMLParser): | |||
| """Subclass of `html.parser.HTMLParser` that does the format conversion.""" | |||
| def __init__(self): | |||
| super().__init__() | |||
| self.tree = [] | |||
| self.output = '' | |||
| self.tag_translate = { | |||
| 'h2': TRtuple(('[b][u]', '[/u][/b]\n'), | |||
| txt_action='capitalise'), | |||
| 'h3': TRtuple(('[b][u]', '[/u][/b]\n'), | |||
| txt_action='capitalise'), | |||
| 'h4': TRtuple(('[b][u]', '[/u][/b]\n'), | |||
| txt_action='capitalise'), | |||
| 'p': TRtuple(('\n', '')), | |||
| 'strong': TRtuple(('[b]', '[/b]')), | |||
| 'em': TRtuple(('[i]', '[/i]')), | |||
| 's': TRtuple(('[strike]', '[/strike]')), | |||
| 'blockquote': TRtuple(('[quote]', '[/quote]')), | |||
| 'code': TRtuple(('[cpp]', '[/cpp]')), | |||
| 'ul': TRtuple(('\n', '')), | |||
| 'ol': TRtuple(('\n', '')), | |||
| 'li': TRtuple(('[*]', '\n')), | |||
| 'a': TRtuple(('[url={href}]', '[/url]')), | |||
| # `img` tags are not always closed, leading to issues, | |||
| # therefore using a single tag for images is simpler | |||
| 'img': TRtuple(('[img]{src}[/img]', '')), | |||
| # `span` is here for underlines to be processed | |||
| 'span': TRtuple(('', '')) | |||
| } | |||
| self.attr_translate = { | |||
| 'style': {'text-decoration': {'underline': ('[u]', '[/u]')}, | |||
| 'color': {}} | |||
| } | |||
| self.text_translate = { | |||
| 'capitalise': str.upper | |||
| } | |||
| def handle_starttag(self, tag, attrs): | |||
| # Nothing to do here? | |||
| if not self.tag_translate.get(tag): | |||
| return | |||
| # As some tags are part of tag properties instead of being direct tags, | |||
| # if they need to be closed, they cannot be collected easily by | |||
| # `handle_endtag`. Therefore, all BBcode closing tags are added to the | |||
| # `self.tree` list, and they will just be popped by `handle_endtag`. | |||
| self.output += (self.tag_translate[tag].tags[0]. | |||
| format(**dict(attrs))) | |||
| self.tree.append(self.tag_translate[tag].tags[1]) | |||
| # Keeping this commented just in case this is after all the right | |||
| # way to exclude those... | |||
| # # Do we have a link? In that case we skip this entire bit | |||
| # if tag == 'a' and any('href' in _attr for _attr in attrs): | |||
| # return | |||
| # # Do we have an image? Already handled | |||
| # if tag == 'img': | |||
| # return | |||
| # Should be a lot easier with the `if` block, but we might exclude some | |||
| # tags... | |||
| if not tag in self.tag_translate: | |||
| for attr, val in attrs: | |||
| # e.g. attrs = [('style', 'text-decoration:underline;'), | |||
| # (...), (...)] | |||
| # e.g. attrs = [('style', 'color:#4ac456;'), | |||
| # ('class', 'has-text-color')] | |||
| # | |||
| # e.g. attr = 'style' | |||
| # | |||
| # e.g. val = 'text-decoration:underline;' | |||
| # e.g. val = 'color:#4ac456;' | |||
| # | |||
| # e.g. property_ = 'text-decoration' | |||
| # e.g. property_ = 'color' | |||
| # | |||
| # e.g. value = 'underline' | |||
| # e.g. value = '#4ac456' | |||
| # | |||
| _parse_attr = val.strip(';').split(':') | |||
| # Not everything can be stripped/split, hence the try block | |||
| try: | |||
| property_ = _parse_attr[0] | |||
| value = _parse_attr[1] | |||
| # Are we dealing with a color? Insert it to the translation | |||
| # dict `self.attr_translate` | |||
| if property_ == 'color': | |||
| self.attr_translate['style']['color'].setdefault( | |||
| value, | |||
| ('[{}]'.format(value), '[/{}]'.format(value)) | |||
| ) | |||
| self.output += (self.attr_translate | |||
| [attr][property_][value][0]) | |||
| self.tree.append(self.attr_translate | |||
| [attr][property_][value][1]) | |||
| # Any other error to catch here? | |||
| except IndexError: | |||
| pass | |||
| # If the data that follows the tag needs to be modified, we need to | |||
| # pass the info somehow to `handle_data`. `self.text_translate` | |||
| # will likely only contain methods as values, as opposed to strings | |||
| # for normal tags, so it's easy to just add a method to `self.tree` | |||
| # and look for methods in `handle_data`. | |||
| if self.tag_translate[tag].txt_action: | |||
| self.tree.append(self.text_translate | |||
| [self.tag_translate[tag].txt_action]) | |||
| def handle_endtag(self, tag): | |||
| if self.tag_translate.get(tag): | |||
| self.output += self.tree.pop() | |||
| def handle_data(self, data): | |||
| # TODO: check: reconversion to handle webhook input? that seems to | |||
| # completely mess up text... why did I have that in the first place?! | |||
| # _data = bytes(data, 'utf8').decode('unicode_escape') | |||
| _data = data | |||
| # | |||
| # Otherwise this works fine for debugging with the webhook as a `str` | |||
| # _data = data | |||
| # In case the incoming data is just a series of new lines, discard them | |||
| if set(_data) == {'\n'}: | |||
| return | |||
| # Check if last element from `self.tree` is a method. If yes, it means | |||
| # we have a `txt_action` to perform against the `data`. | |||
| # Currently the only method type is `types.MethodDescriptorType` but | |||
| # possibly there will be more later. | |||
| try: | |||
| if isinstance(self.tree[-1], types.MethodDescriptorType): | |||
| # self.tree.pop() is a method! | |||
| _data = self.tree.pop()(_data) | |||
| except IndexError: | |||
| pass | |||
| self.output += _data | |||
| def feed(self, data): | |||
| # Resets `output` to an empty string before calling `feed` | |||
| self.output = '' | |||
| super().feed(data) | |||
| @ -0,0 +1,6 @@ | |||
| { | |||
| "user": "", | |||
| "user_hash": "", | |||
| "posturl": "", | |||
| "pre_title": "" | |||
| } | |||
| @ -0,0 +1,52 @@ | |||
| """Provides the `post_HFR` function that posts messages to HFR. | |||
| You should get those ready: | |||
| user (str): username | |||
| user_hash (str): hashlib.md5(<plaintext_password>.encode()).hexdigest() | |||
| posturl (str): the post URL you are responding to (any post in the | |||
| topic will do) | |||
| newpost (str): BBcode of the new post that's being posted | |||
| Beware of throttling/anti-flood limitations. | |||
| """ | |||
| import urllib.parse | |||
| import requests | |||
| from bs4 import BeautifulSoup | |||
| BASE = 'https://forum.hardware.fr' | |||
| POST = '/bddpost.php' | |||
| def post_HFR(user, user_hash, posturl, newpost): | |||
| """Based on inputs, posts to HFR. Returns the resulting `requests` object. | |||
| """ | |||
| # Cookie preparation. 3 Ss for md_passs, yes. | |||
| cookie = {'md_passs': user_hash, | |||
| 'md_user': user} | |||
| # Create HFR session | |||
| HFR = requests.Session() | |||
| HFR.cookies.update(cookie) | |||
| # Retrieve page, we'll need the hash_check from it | |||
| req = HFR.get(posturl) | |||
| soup = BeautifulSoup(req.text, 'html.parser') | |||
| # Parse post URL, we'll need to extract a bunch of values from it | |||
| parsedURL = urllib.parse.parse_qs(posturl) | |||
| # Prepare payload | |||
| payload = {'hash_check': soup.find('input', | |||
| {'name': 'hash_check'})['value'], | |||
| 'post': parsedURL['post'][0], | |||
| 'cat': parsedURL['cat'][0], | |||
| 'verifrequet': 1100, | |||
| 'sujet': soup.find('input', {'name': 'sujet'})['value'], | |||
| 'content_form': newpost, | |||
| 'pseudo': user} | |||
| # Post payload | |||
| post_req = HFR.post(BASE+POST, data=payload) | |||
| return post_req | |||
| @ -0,0 +1,82 @@ | |||
| """Provides a flask route that receives webhooks from WordPress (expecting | |||
| `post_content`, `post_name`, and `post_url`), attempts to convert into BBcode, | |||
| then posts it to a `forum.hardware.fr` topic. | |||
| HFR credentials are stored in a separate `credentials.json` JSON file. See | |||
| `hfr.py` for details on how to generate the `user_hash`. `pre_title` can be | |||
| anything, just add as many `\n`s as you wish at the end of the string. | |||
| { | |||
| "user": "", | |||
| "user_hash": "", | |||
| "posturl": "", | |||
| "pre_title": "" | |||
| } | |||
| """ | |||
| import logging | |||
| import urllib.parse | |||
| import json | |||
| from flask import Flask, request | |||
| from converter import HTMLtoBBcode | |||
| from hfr import post_HFR | |||
| app = Flask(__name__) | |||
| CREDENTIALS_FILE = 'credentials.json' | |||
| parser = HTMLtoBBcode() | |||
| with open(CREDENTIALS_FILE, 'r') as fp: | |||
| config = json.load(fp) | |||
| @app.route('/wordpress_webhook', methods=['POST']) | |||
| def webhook_handler(): | |||
| """Function doing all the work.""" | |||
| logger.info('got called, {}'.format(request)) | |||
| webhook_data = urllib.parse.parse_qs(request.get_data(as_text=True)) | |||
| logger.info(webhook_data) | |||
| parser.feed(webhook_data['post_content'][0]) | |||
| content = parser.output | |||
| title = webhook_data['post_name'][0].replace('-', ' ') | |||
| wordpress_url = webhook_data['post_url'][0] | |||
| title_BB = '[url={url}][b]{title}[/b][/url]\n\n'.format( | |||
| url=wordpress_url, | |||
| title=title) | |||
| post_BB = config['pre_title'] + title_BB + content | |||
| logger.info(post_BB) | |||
| outcome = post_HFR(config['user'], | |||
| config['user_hash'], | |||
| config['posturl'], | |||
| post_BB) | |||
| logger.info(outcome) | |||
| logger.info(outcome.text) | |||
| return '' | |||
| def prepare_logger(logger_name=__name__): | |||
| """Simple logger preparation function | |||
| """ | |||
| logger = logging.getLogger(logger_name) | |||
| logger.setLevel(logging.INFO) | |||
| handler = logging.StreamHandler() | |||
| formatter = logging.Formatter( | |||
| '{asctime} {name} {levelname:8s} {message}', | |||
| style='{') | |||
| handler.setFormatter(formatter) | |||
| logger.addHandler(handler) | |||
| return logger | |||
| logger = prepare_logger() | |||
| if __name__ == '__main__': | |||
| app.run(host='0.0.0.0', port=54321, debug=True) | |||