369 lines
10 KiB
Python
Executable File
369 lines
10 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
#
|
|
# import_google_fonts
|
|
#
|
|
# Using Python rather than Ruby for the Protocol Buffer parser.
|
|
# https://github.com/protocolbuffers/protobuf/issues/6508#issuecomment-522165498
|
|
#
|
|
# To install dependencies:
|
|
#
|
|
# pip3 install gftools html2text jinja2 protobuf
|
|
|
|
from functools import reduce
|
|
from glob import glob
|
|
import os
|
|
import re
|
|
import sys
|
|
from google.protobuf import text_format
|
|
import gftools.fonts_public_pb2 as fonts_pb2
|
|
import jinja2
|
|
import html2text
|
|
import urllib.request
|
|
|
|
def parse_metadata(filename):
|
|
# based off of
|
|
# https://github.com/googlefonts/gftools/blob/2bfd4acd402b353aaeb46b991e6cad855001e4c8/Lib/gftools/util/google_fonts.py
|
|
with open(filename) as f:
|
|
meta = fonts_pb2.FamilyProto()
|
|
text_format.Merge(f.read(), meta)
|
|
return meta
|
|
|
|
|
|
class FontCask:
|
|
ENVIRONMENT = jinja2.Environment(
|
|
keep_trailing_newline=True, trim_blocks=True, undefined=jinja2.StrictUndefined,
|
|
)
|
|
TEMPLATE = ENVIRONMENT.from_string(
|
|
"""cask "{{token}}" do
|
|
version :latest
|
|
sha256 :no_check
|
|
|
|
{% if files|length == 1 %}
|
|
url "https://github.com/google/fonts/raw/main/{{folder}}/{{files[0] | urlencode}}"
|
|
{%- if not 'github.com/' in homepage %},
|
|
verified: "github.com/google/fonts/"
|
|
{%- endif +%}
|
|
{% else %}
|
|
url "https://github.com/google/fonts.git",
|
|
{%- if not 'github.com/' in homepage +%}
|
|
verified: "github.com/google/fonts",
|
|
{%- endif +%}
|
|
branch: "main",
|
|
only_path: "{{folder}}"
|
|
{% endif %}
|
|
name "{{font_name}}"
|
|
{% if desc %}
|
|
desc "{{desc}}"
|
|
{% endif %}
|
|
homepage "{{homepage}}"
|
|
|
|
{% for file in files %}
|
|
font "{{file}}"
|
|
{% endfor %}
|
|
|
|
# No zap stanza required
|
|
end
|
|
"""
|
|
)
|
|
|
|
def __init__(self, folder, meta, description=None, early_access=False):
|
|
self.folder = folder
|
|
self.meta = meta
|
|
self.desc = description
|
|
self.early_access = early_access
|
|
self.homepage_override = None
|
|
|
|
def font_name(self):
|
|
return self.meta.name
|
|
|
|
def description(self):
|
|
if not self.desc:
|
|
return None
|
|
|
|
if len(self.desc) == 0:
|
|
return None
|
|
|
|
return self.desc
|
|
|
|
def token(self):
|
|
# https://github.com/Homebrew/homebrew-cask-fonts/blob/master/CONTRIBUTING.md#converting-the-canonical-name-to-a-token
|
|
token = self.font_name().lower().replace(" ", "-")
|
|
return f"font-{token}"
|
|
|
|
def token_sharding_dir(self):
|
|
return "font-" + self.token().split("font-")[1][0]
|
|
|
|
def dest_path(self):
|
|
return os.path.join("Casks", "font", f"{self.token_sharding_dir()}", f"{self.token()}.rb")
|
|
|
|
def name_path(self):
|
|
return self.font_name().replace(" ", "+")
|
|
|
|
def homepage(self):
|
|
if self.homepage_override is not None:
|
|
return self.homepage_override
|
|
|
|
if self.early_access:
|
|
return f"https://fonts.google.com/earlyaccess"
|
|
|
|
return f"https://fonts.google.com/specimen/{self.name_path()}"
|
|
|
|
def files(self):
|
|
results = [font.filename for font in self.meta.fonts]
|
|
results.sort()
|
|
return results
|
|
|
|
def cask_content(self):
|
|
return self.TEMPLATE.render(
|
|
token=self.token(),
|
|
folder=self.folder,
|
|
font_name=self.font_name(),
|
|
desc=self.description(),
|
|
homepage=self.homepage(),
|
|
files=self.files(),
|
|
)
|
|
|
|
|
|
def is_other_foundry(cask_path):
|
|
with open(cask_path) as f:
|
|
contents = f.read()
|
|
|
|
return not re.search(r"url ['\"]https://github.com/google/fonts", contents)
|
|
|
|
|
|
def should_skip(cask_path):
|
|
if os.path.exists(cask_path):
|
|
# Cask already exists
|
|
if is_other_foundry(cask_path):
|
|
print("Other foundry:", cask_path)
|
|
# don't overwrite it, per
|
|
# https://github.com/Homebrew/homebrew-cask-fonts/blob/master/CONTRIBUTING.md#google-web-font-directory
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def metadata_to_cask(meta_file, repo_dir):
|
|
folder = os.path.dirname(os.path.relpath(meta_file, start=repo_dir))
|
|
meta = parse_metadata(meta_file)
|
|
|
|
description_path = os.path.join(os.path.dirname(meta_file), "DESCRIPTION.en_us.html")
|
|
|
|
description = None
|
|
|
|
if os.path.exists(description_path):
|
|
with open(description_path) as f:
|
|
h2t = html2text.HTML2Text()
|
|
h2t.ignore_links = True
|
|
h2t.ignore_images = True
|
|
h2t.ignore_tables = True
|
|
h2t.ignore_emphasis = True
|
|
|
|
contents = " ".join(h2t.handle(f.read()).replace('"', '').split())
|
|
|
|
regex = r".*" + re.escape(meta.name) + r"\s+(?:.*\s+)?is(?:\s+an?|the)?\s+"
|
|
parts = re.split(regex, contents, maxsplit=1)
|
|
|
|
if len(parts) > 1:
|
|
description = parts[1].split(".")[0].capitalize()
|
|
|
|
return FontCask(folder, meta, description=description)
|
|
|
|
|
|
def write_cask(cask):
|
|
path = cask.dest_path()
|
|
if should_skip(path):
|
|
return False
|
|
|
|
content = cask.cask_content()
|
|
|
|
if os.path.exists(path):
|
|
with open(path, "r") as f:
|
|
if f.read() == content:
|
|
return False
|
|
|
|
directory = os.path.dirname(path)
|
|
if not os.path.exists(directory):
|
|
os.makedirs(directory)
|
|
|
|
with open(path, "w") as f:
|
|
f.write(content)
|
|
|
|
return True
|
|
|
|
|
|
def find_google_casks():
|
|
casks = {}
|
|
|
|
for cask_path in glob('Casks/font/**/*.rb'):
|
|
token = os.path.splitext(os.path.basename(cask_path))[0]
|
|
|
|
with open(cask_path, "r") as f:
|
|
contents = f.read()
|
|
|
|
# Skip "font-material-symbols" as it matches the url regex, but is not included in the Google Fonts repo
|
|
if os.path.basename(cask_path) == "font-material-symbols.rb":
|
|
continue
|
|
|
|
if not re.search(r"(github\.com\/google\/fonts|fonts\.google\.com|google\.com/fonts)", contents):
|
|
continue
|
|
|
|
homepage = re.search(r"homepage\s+([\"'])(.*)(\1)\s*", contents)
|
|
if homepage:
|
|
homepage = homepage[2]
|
|
|
|
desc = re.search(r"desc\s+([\"'])(.*)(\1)\s*", contents)
|
|
if desc:
|
|
description = desc[2]
|
|
else:
|
|
description = None
|
|
|
|
casks[token] = {
|
|
'path': cask_path,
|
|
'description': description,
|
|
'homepage': homepage,
|
|
}
|
|
|
|
return casks
|
|
|
|
def find_family_folders(repo_dir):
|
|
SUBDIRS = ["apache", "ofl", "ufl"]
|
|
folders_list = [glob(os.path.join(repo_dir, subdir, "*")) for subdir in SUBDIRS]
|
|
# flatten
|
|
return reduce(lambda x, y: x + y, folders_list)
|
|
|
|
|
|
# https://www.geeksforgeeks.org/python-split-camelcase-string-to-individual-strings/
|
|
def camel_case_split(str):
|
|
return re.findall(r"[A-Z](?:[a-z]+|[A-Z]*(?=[A-Z]|$))", str)
|
|
|
|
|
|
def derive_name(font_file):
|
|
parent_dir_name = os.path.basename(os.path.dirname(font_file))
|
|
|
|
font_file = os.path.splitext(os.path.basename(font_file))[0]
|
|
font_file = font_file[: len(parent_dir_name)]
|
|
font_file = re.sub(r"-\w+$", "", font_file)
|
|
|
|
name_parts = camel_case_split(font_file)
|
|
result = " ".join(name_parts)
|
|
return result
|
|
|
|
|
|
def derive_metadata(family_folder):
|
|
"""Create a metadata object based on the contents of the folder."""
|
|
|
|
meta = fonts_pb2.FamilyProto()
|
|
|
|
font_files = glob(os.path.join(family_folder, "*.ttf"))
|
|
# grab the first font, arbitrarily
|
|
meta.name = derive_name(font_files[0])
|
|
|
|
fonts = [
|
|
fonts_pb2.FontProto(filename=os.path.basename(filename))
|
|
for filename in font_files
|
|
]
|
|
meta.fonts.extend(fonts)
|
|
|
|
return meta
|
|
|
|
|
|
def derive_cask(family_folder, repo_dir):
|
|
meta = derive_metadata(family_folder)
|
|
folder = os.path.relpath(family_folder, start=repo_dir)
|
|
|
|
early_access_file = os.path.join(family_folder, "EARLY_ACCESS.category")
|
|
early_access = os.path.exists(early_access_file)
|
|
|
|
return FontCask(folder, meta, early_access=early_access)
|
|
|
|
|
|
def run():
|
|
if len(sys.argv) != 3:
|
|
print(
|
|
"""Usage: ./import_google_fonts <path-to-repo> <mode>
|
|
|
|
Download the or clone the repository from https://github.com/google/fonts, then provide the path to the script.
|
|
"""
|
|
)
|
|
sys.exit(1)
|
|
|
|
repo_dir = sys.argv[1]
|
|
mode = sys.argv[2]
|
|
family_folders = find_family_folders(repo_dir)
|
|
|
|
existing_casks = find_google_casks()
|
|
added_casks = {}
|
|
updated_casks = {}
|
|
|
|
for family_folder in family_folders:
|
|
meta_file = os.path.join(family_folder, "METADATA.pb")
|
|
# check if the metadata file is present
|
|
# https://github.com/google/fonts/issues/2512
|
|
if os.path.exists(meta_file):
|
|
try:
|
|
cask = metadata_to_cask(meta_file, repo_dir)
|
|
except text_format.ParseError:
|
|
continue
|
|
else:
|
|
cask = derive_cask(family_folder, repo_dir)
|
|
|
|
# Ek Mukta has been renamed to just Mukta but still exists.
|
|
if cask.token() == 'font-ek-mukta':
|
|
continue
|
|
|
|
# Skip cask if already handled (i.e. if it exists in multiple license sub-directories).
|
|
if cask.token() in added_casks or cask.token() in updated_casks:
|
|
continue
|
|
|
|
existing_cask = existing_casks.pop(cask.token(), None)
|
|
if existing_cask:
|
|
cask.desc = existing_cask['description']
|
|
|
|
# If font is unreleased, re-use previous homepage URL.
|
|
if cask.homepage() != existing_cask['homepage']:
|
|
try:
|
|
urllib.request.urlopen(cask.homepage())
|
|
except urllib.request.URLError as e:
|
|
if e.code == 404:
|
|
cask.homepage_override = existing_cask['homepage']
|
|
|
|
updated_casks[cask.token()] = cask
|
|
else:
|
|
try:
|
|
urllib.request.urlopen(cask.homepage())
|
|
except urllib.request.URLError as e:
|
|
if e.code == 404:
|
|
cask.homepage_override = cask.meta.source.repository_url
|
|
|
|
added_casks[cask.token()] = cask
|
|
|
|
changed_casks = {}
|
|
|
|
print("Mode " + mode)
|
|
|
|
if not mode or mode == 'add':
|
|
print("Adding added casks")
|
|
|
|
changed_casks |= added_casks
|
|
|
|
if not mode or mode == 'update':
|
|
print("Adding updated casks")
|
|
changed_casks |= updated_casks
|
|
|
|
written_casks = 0
|
|
for token, cask in changed_casks.items():
|
|
if write_cask(cask):
|
|
written_casks += 1
|
|
|
|
# Limit cask changes per PR.
|
|
if mode is not None and written_casks >= 50:
|
|
break
|
|
|
|
if not mode or mode == 'delete':
|
|
# Delete casks which don't exist anymore.
|
|
for deleted_cask in existing_casks.values():
|
|
os.remove(deleted_cask["path"])
|
|
|
|
run()
|