SeleniumBase/mkdocs_build/prepare.py

""" For preparing the mkdocs-generated seleniumbase.io website. """

import codecs
import os
import re
from pathlib import Path

GITHUB_URL = r"https://github.com/seleniumbase/SeleniumBase/blob/master/"
ROOT_DIR = Path(__file__).parents[1]
URL_PATTERN = re.compile(
    r"(?:\(|<a href=\")(?P<url>{}[\w/.]+\.md)(?:\)|\")".format(GITHUB_URL)
)
MD_PATH_PATTERN = re.compile(r"\[.*\]\((?P<path>[\w\\._/]+\.md)\)")
HEADER_PATTERN = re.compile(
    r"^(?P<level>#+)\s*(<[\w\s=\":/.]+>)?\s*\**(?P<header>.*[\w`]):?\**\s*$",
    flags=re.MULTILINE,
)

PROCESSED_PATHS = set()


def normalize_path(path):
    path = Path(path).absolute().relative_to(ROOT_DIR)
    return str(path).replace("\\", "/")


def read_file(file_name):
    path = ROOT_DIR / file_name
    with path.open() as file_handle:
        content = file_handle.read()
    return content


def process_file(file_name):
    content = read_file(file_name)
    urls = URL_PATTERN.findall(content)
    # content = content.replace("<br />", "  \n")
    content = re.sub(HEADER_PATTERN, r"\g<level> \g<header>", content)
    directory = "/".join(normalize_path(file_name).split("/")[:-1])

    paths = set()

    md_paths = MD_PATH_PATTERN.findall(content)
    for md_path in md_paths:
        path = md_path.lstrip("/")
        if (ROOT_DIR / directory / path).exists():
            path = ROOT_DIR / directory / path
        else:
            path = ROOT_DIR / path
        path = path.resolve().relative_to(ROOT_DIR)
        paths.add(normalize_path(path))
        content = content.replace("(" + md_path + ")", normalize_path(path))

    for url in urls:
        path = url[len(GITHUB_URL) :]  # noqa: E203
        paths.add(path)
        content = content.replace(
            url, normalize_path(os.path.relpath(path, directory))
        )

    output_path = ROOT_DIR / "mkdocs_build" / file_name
    if not output_path.parent.is_dir():
        os.makedirs(output_path.parent)

    with output_path.open("w+") as output_file:
        output_file.write(content)
    PROCESSED_PATHS.add(normalize_path(file_name))

    for path in paths:
        if path not in PROCESSED_PATHS:
            process_file(normalize_path(path))


def main(*args, **kwargs):
    files_to_process = ["README.md"]
    scanned_dir_list = []
    scanned_dir_list.append("help_docs")
    scanned_dir_list.append("examples")
    scanned_dir_list.append("examples/behave_bdd")
    scanned_dir_list.append("examples/example_logs")
    scanned_dir_list.append("examples/presenter")
    scanned_dir_list.append("examples/chart_maker")
    scanned_dir_list.append("examples/tour_examples")
    scanned_dir_list.append("examples/visual_testing")
    scanned_dir_list.append("integrations/google_cloud")
    scanned_dir_list.append("seleniumbase/console_scripts")
    for scanned_dir in scanned_dir_list:
        for dir_ in os.listdir(ROOT_DIR / scanned_dir):
            files_to_process.append(os.path.join(scanned_dir, dir_))

    video_embed = (
        '<figure class="wp-block-embed wp-block-embed-youtube is-type-video '
        'is-provider-youtube"><div class="wp-block-embed__wrapper">'
        '<div class="epyt-video-wrapper fluid-width-video-wrapper" '
        'style="padding-top: 3px !important;"><iframe loading="lazy" '
        'id="_ytid_36718" data-origwidth="1200" data-origheight="675" '
        'src="https://www.youtube.com/embed/yt_code?enablejsapi=1&amp;'
        "origin=https://seleniumbase.io&amp;autoplay=0&amp;cc_load_policy=0"
        "&amp;cc_lang_pref=&amp;iv_load_policy=1&amp;loop=0&amp;"
        "modestbranding=1&amp;rel=0&amp;fs=1&amp;playsinline=0&amp;"
        'autohide=2&amp;theme=dark&amp;color=red&amp;controls=1&amp;" '
        'class="__youtube_prefs__ no-lazyload" title="YouTube player" '
        'allow="autoplay; encrypted-media" allowfullscreen="" '
        'data-no-lazy="1" data-skipgform_ajax_framebjll="">'
        "</iframe></div></div></figure>"
    )

    updated_files_to_process = []
    for file_ in files_to_process:
        if file_.endswith(".md"):
            updated_files_to_process.append(file_)

    for file_ in updated_files_to_process:
        process_file(file_)

    for file_ in updated_files_to_process:
        readme_file = "./mkdocs_build/" + file_
        with open(readme_file, "r", encoding="utf-8") as f:
            all_code = f.read()
        code_lines = all_code.split("\n")

        changed = False
        seleniumbase_lines = []
        for line in code_lines:
            if ' href="' in line and '.md"' in line:
                changed = True
                line = line.replace('.md"', '/"')
            if "<!-- SeleniumBase Docs -->" in line:
                changed = True
                new_lines = []
                new_lines.append("---")
                new_lines.append("hide:")
                new_lines.append("  - toc")
                new_lines.append("---")
                for line in new_lines:
                    seleniumbase_lines.append(line)
                continue
            if "<!-- View on GitHub -->" in line:
                changed = True
                line = (
                    r'<p align="center"><div align="center">'
                    r'<a href="https://github.com/seleniumbase/SeleniumBase">'
                    r'<img src="https://img.shields.io/badge/'
                    r"✅%20💛%20View%20Code-on%20GitHub%20🌎%20🚀"
                    r'-02A79E.svg" alt="SeleniumBase on GitHub" />'
                    r"</a></div></p>"
                )
            alt_link_badge = (
                '<a href="https://seleniumbase.io">'
                '<img src="https://img.shields.io/badge/docs-seleniumbase.io'
                '-11BBAA.svg" alt="SeleniumBase Docs" /></a>'
            )
            back_to_gh = (
                r'<a href="https://github.com/seleniumbase/SeleniumBase">'
                r'<img src="https://img.shields.io/badge/'
                r"✅%20View%20Code-on%20GitHub%20🌎"
                r'-02A79E.svg" alt="SeleniumBase on GitHub" />'
                r"</a>"
            )
            if alt_link_badge in line:
                line = line.replace(alt_link_badge, back_to_gh)
            if "<!-- GitHub Only -->" in line:
                changed = True
                continue
            if "<!-- YouTube View -->" in line and "watch?v=" in line:
                start_pt = line.find("watch?v=") + len("watch?v=")
                end_pt = line.find('"', start_pt + 1)
                yt_code = line[start_pt:end_pt]
                changed = True
                line = video_embed.replace("yt_code", yt_code)
            if "<!-- SeleniumBase Header1 -->" in line:
                changed = True
                line = (
                    '<section align="center"><div align="center">'
                    "<h2>✅ Reliable Browser Testing</h2>"
                    "</div></section>"
                )
            if "<!-- SeleniumBase Docs -->" in line:
                changed = True
                line = (
                    '<h2><img '
                    'src="https://seleniumbase.github.io/img/logo3b.png" '
                    'title="SeleniumBase" width="24" /> SeleniumBase Docs '
                    '<img '
                    'src="https://seleniumbase.github.io/img/logo3b.png" '
                    'title="SeleniumBase" width="24" /></h2>'
                )
            seleniumbase_lines.append(line)
        if changed:
            out_file = codecs.open(readme_file, "w+", encoding="utf-8")
            out_file.writelines("\r\n".join(seleniumbase_lines))
            out_file.close()
Update the docs 2021-10-12 05:00:03 +08:00			`""" For preparing the mkdocs-generated seleniumbase.io website. """`

Update the mkdocs prepare tool 2020-05-13 03:45:18 +08:00			`import codecs`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`import os`
			`import re`
			`from pathlib import Path`

			`GITHUB_URL = r"https://github.com/seleniumbase/SeleniumBase/blob/master/"`
			`ROOT_DIR = Path(__file__).parents[1]`
Fixed flake8 2020-05-12 01:45:23 +08:00			`URL_PATTERN = re.compile(`
			`r"(?:\(\|<a href=\")(?P<url>{}[\w/.]+\.md)(?:\)\|\")".format(GITHUB_URL)`
			`)`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`MD_PATH_PATTERN = re.compile(r"\[.*\]\((?P<path>[\w\\._/]+\.md)\)")`
			`HEADER_PATTERN = re.compile(`
			r"^(?P<level>#+)\s(<[\w\s=\":/.]+>)?\s\*(?P<header>.[\w`]):?\*\s$",
			`flags=re.MULTILINE,`
			`)`

			`PROCESSED_PATHS = set()`

Fixed flake8 2020-05-12 01:45:23 +08:00
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`def normalize_path(path):`
			`path = Path(path).absolute().relative_to(ROOT_DIR)`
			`return str(path).replace("\\", "/")`

Fixed flake8 2020-05-12 01:45:23 +08:00
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`def read_file(file_name):`
			`path = ROOT_DIR / file_name`
			`with path.open() as file_handle:`
			`content = file_handle.read()`
			`return content`


			`def process_file(file_name):`
			`content = read_file(file_name)`
			`urls = URL_PATTERN.findall(content)`
Update mkdocs files 2020-05-13 07:23:40 +08:00			`# content = content.replace("<br />", " \n")`
Fixed flake8 2020-05-12 01:45:23 +08:00			`content = re.sub(HEADER_PATTERN, r"\g<level> \g<header>", content)`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`directory = "/".join(normalize_path(file_name).split("/")[:-1])`

			`paths = set()`

			`md_paths = MD_PATH_PATTERN.findall(content)`
			`for md_path in md_paths:`
			`path = md_path.lstrip("/")`
			`if (ROOT_DIR / directory / path).exists():`
Fixed flake8 2020-05-12 01:45:23 +08:00			`path = ROOT_DIR / directory / path`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`else:`
Fixed flake8 2020-05-12 01:45:23 +08:00			`path = ROOT_DIR / path`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`path = path.resolve().relative_to(ROOT_DIR)`
			`paths.add(normalize_path(path))`
			`content = content.replace("(" + md_path + ")", normalize_path(path))`

			`for url in urls:`
Perform code optimization 2021-05-09 02:10:45 +08:00			`path = url[len(GITHUB_URL) :] # noqa: E203`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`paths.add(path)`
Fixed flake8 2020-05-12 01:45:23 +08:00			`content = content.replace(`
			`url, normalize_path(os.path.relpath(path, directory))`
			`)`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00
Refactor the docs 2021-10-22 02:35:34 +08:00			`output_path = ROOT_DIR / "mkdocs_build" / file_name`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`if not output_path.parent.is_dir():`
			`os.makedirs(output_path.parent)`

			`with output_path.open("w+") as output_file:`
			`output_file.write(content)`
			`PROCESSED_PATHS.add(normalize_path(file_name))`

			`for path in paths:`
			`if path not in PROCESSED_PATHS:`
			`process_file(normalize_path(path))`


			`def main(args, *kwargs):`
			`files_to_process = ["README.md"]`
Update the docs 2021-08-11 12:00:22 +08:00			`scanned_dir_list = []`
			`scanned_dir_list.append("help_docs")`
Refresh mkdocs files 2022-11-05 11:42:41 +08:00			`scanned_dir_list.append("examples")`
Update mkdocs 2022-11-06 12:50:23 +08:00			`scanned_dir_list.append("examples/behave_bdd")`
Update the docs 2021-08-11 12:00:22 +08:00			`scanned_dir_list.append("examples/example_logs")`
Update the docs 2021-08-12 10:25:29 +08:00			`scanned_dir_list.append("examples/presenter")`
			`scanned_dir_list.append("examples/chart_maker")`
			`scanned_dir_list.append("examples/tour_examples")`
Update the docs 2021-08-11 12:00:22 +08:00			`scanned_dir_list.append("examples/visual_testing")`
			`scanned_dir_list.append("integrations/google_cloud")`
Refresh mkdocs files 2022-11-05 11:42:41 +08:00			`scanned_dir_list.append("seleniumbase/console_scripts")`
Update the docs 2021-08-11 12:00:22 +08:00			`for scanned_dir in scanned_dir_list:`
			`for dir_ in os.listdir(ROOT_DIR / scanned_dir):`
			`files_to_process.append(os.path.join(scanned_dir, dir_))`

			`video_embed = (`
			`'<figure class="wp-block-embed wp-block-embed-youtube is-type-video '`
			`'is-provider-youtube"><div class="wp-block-embed__wrapper">'`
			`'<div class="epyt-video-wrapper fluid-width-video-wrapper" '`
			`'style="padding-top: 3px !important;"><iframe loading="lazy" '`
			`'id="_ytid_36718" data-origwidth="1200" data-origheight="675" '`
			`'src="https://www.youtube.com/embed/yt_code?enablejsapi=1&'`
Refactor and update the docs 2022-04-14 01:26:54 +08:00			`"origin=https://seleniumbase.io&autoplay=0&cc_load_policy=0"`
			`"&cc_lang_pref=&iv_load_policy=1&loop=0&"`
			`"modestbranding=1&rel=0&fs=1&playsinline=0&"`
Update the docs 2021-08-11 12:00:22 +08:00			`'autohide=2&theme=dark&color=red&controls=1&" '`
			`'class="__youtube_prefs__ no-lazyload" title="YouTube player" '`
			`'allow="autoplay; encrypted-media" allowfullscreen="" '`
			`'data-no-lazy="1" data-skipgform_ajax_framebjll="">'`
Refactor and update the docs 2022-04-14 01:26:54 +08:00			`"</iframe></div></div></figure>"`
Update the docs 2021-08-11 12:00:22 +08:00			`)`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00
Update a mkdocs file 2020-05-12 06:43:56 +08:00			`updated_files_to_process = []`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`for file_ in files_to_process:`
Update a mkdocs file 2020-05-12 06:43:56 +08:00			`if file_.endswith(".md"):`
			`updated_files_to_process.append(file_)`

			`for file_ in updated_files_to_process:`
Add initial setup for mkdocs 2020-05-12 01:33:54 +08:00			`process_file(file_)`
Update the mkdocs prepare tool 2020-05-13 03:45:18 +08:00
Update the docs 2021-08-11 12:00:22 +08:00			`for file_ in updated_files_to_process:`
Refactor the docs 2021-10-22 02:35:34 +08:00			`readme_file = "./mkdocs_build/" + file_`
Update the docs 2021-08-11 12:00:22 +08:00			`with open(readme_file, "r", encoding="utf-8") as f:`
			`all_code = f.read()`
			`code_lines = all_code.split("\n")`

			`changed = False`
			`seleniumbase_lines = []`
			`for line in code_lines:`
			`if ' href="' in line and '.md"' in line:`
			`changed = True`
			`line = line.replace('.md"', '/"')`
Update the docs 2022-11-05 13:27:22 +08:00			`if "<!-- SeleniumBase Docs -->" in line:`
Refresh mkdocs files 2022-11-05 11:42:41 +08:00			`changed = True`
			`new_lines = []`
			`new_lines.append("---")`
			`new_lines.append("hide:")`
			`new_lines.append(" - toc")`
			`new_lines.append("---")`
			`for line in new_lines:`
			`seleniumbase_lines.append(line)`
			`continue`
Update the docs 2021-08-11 12:00:22 +08:00			`if "<!-- View on GitHub -->" in line:`
			`changed = True`
			`line = (`
Update the documentation 2022-05-26 10:07:05 +08:00			`r'<p align="center"><div align="center">'`
Update the docs 2021-08-11 12:00:22 +08:00			`r'<a href="https://github.com/seleniumbase/SeleniumBase">'`
			`r'<img src="https://img.shields.io/badge/'`
			`r"✅%20💛%20View%20Code-on%20GitHub%20🌎%20🚀"`
			`r'-02A79E.svg" alt="SeleniumBase on GitHub" />'`
			`r"</a></div></p>"`
			`)`
Update the docs 2022-06-10 12:22:04 +08:00			`alt_link_badge = (`
			`'<a href="https://seleniumbase.io">'`
			`'<img src="https://img.shields.io/badge/docs-seleniumbase.io'`
			`'-11BBAA.svg" alt="SeleniumBase Docs" /></a>'`
			`)`
			`back_to_gh = (`
			`r'<a href="https://github.com/seleniumbase/SeleniumBase">'`
			`r'<img src="https://img.shields.io/badge/'`
Update the documentation 2022-07-13 21:17:04 +08:00			`r"✅%20View%20Code-on%20GitHub%20🌎"`
Update the docs 2022-06-10 12:22:04 +08:00			`r'-02A79E.svg" alt="SeleniumBase on GitHub" />'`
			`r"</a>"`
			`)`
			`if alt_link_badge in line:`
			`line = line.replace(alt_link_badge, back_to_gh)`
Update the docs 2021-08-11 12:00:22 +08:00			`if "<!-- GitHub Only -->" in line:`
			`changed = True`
			`continue`
Refactor and update the docs 2022-04-14 01:26:54 +08:00			`if "<!-- YouTube View -->" in line and "watch?v=" in line:`
Update the docs 2021-08-11 12:00:22 +08:00			`start_pt = line.find("watch?v=") + len("watch?v=")`
			`end_pt = line.find('"', start_pt + 1)`
			`yt_code = line[start_pt:end_pt]`
			`changed = True`
			`line = video_embed.replace("yt_code", yt_code)`
			`if "<!-- SeleniumBase Header1 -->" in line:`
			`changed = True`
			`line = (`
Update the documentation 2022-05-26 10:07:05 +08:00			`'<section align="center"><div align="center">'`
Update the docs 2021-08-11 12:00:22 +08:00			`"<h2>✅ Reliable Browser Testing</h2>"`
			`"</div></section>"`
			`)`
			`if "<!-- SeleniumBase Docs -->" in line:`
			`changed = True`
			`line = (`
Update the documentation 2022-11-27 04:37:54 +08:00			`'<h2><img '`
			`'src="https://seleniumbase.github.io/img/logo3b.png" '`
			`'title="SeleniumBase" width="24" /> SeleniumBase Docs '`
			`'<img '`
			`'src="https://seleniumbase.github.io/img/logo3b.png" '`
			`'title="SeleniumBase" width="24" /></h2>'`
Update the docs 2021-08-11 12:00:22 +08:00			`)`
			`seleniumbase_lines.append(line)`
			`if changed:`
			`out_file = codecs.open(readme_file, "w+", encoding="utf-8")`
			`out_file.writelines("\r\n".join(seleniumbase_lines))`
			`out_file.close()`