"""Rendering of formulas and Content MathML.
A common problem in rendering MathML is that the content MathML is difficult to read.
The presentation MathML has a much better rendering and improves understandability.
This module uses stylesheets for the conversion of content MathMl -> presentation
MathML.
see also: https://docs.sympy.org/dev/modules/printing.html#module-sympy.printing.mathml
"""
import re
from functools import lru_cache
from typing import Optional, Set
import libsbml
import lxml.etree as ET
from sbmlutils import RESOURCES_DIR, log
[docs]logger = log.get_logger(__name__)
[docs]xslt_cmml2pmml = ET.parse(str(RESOURCES_DIR / "xslt" / "ctopff.xsl"))
[docs]xslt_pmml2tex = ET.parse(str(RESOURCES_DIR / "xslt" / "xsltml" / "mmltex.xsl"))
[docs]def cmathml_to_astnode(cmathml: str) -> libsbml.ASTNode:
"""Convert Content MathML string to ASTNode.
:param cmathml: SBML Content MathML string
:return: libsbml.ASTNode
"""
return libsbml.readMathMLFromString(cmathml)
[docs]def astnode_to_latex(astnode: libsbml.ASTNode) -> str:
"""Convert ASTNode to Latex using XSLT transformation."""
cmml_str: str = libsbml.writeMathMLToString(astnode)
cmml_str = cmml_str.replace('<?xml version="1.0" encoding="UTF-8"?>', "")
return cmathml_to_latex(cmml_str)
@lru_cache(maxsize=10000)
[docs]def cmathml_to_latex(cmml_str: str) -> str:
"""Content MathML to latex conversion using XSLT transformation."""
# content MathML -> presentation MathML
cmml_dom = ET.fromstring(cmml_str)
transform1 = ET.XSLT(xslt_cmml2pmml)
pmml_dom = transform1(cmml_dom)
# content MathML -> latex
transform2 = ET.XSLT(xslt_pmml2tex)
tex_str = str(transform2(pmml_dom))
# remove equation symbols
tex_str = tex_str.replace("$", "")
# fix piecewise
tex_str = tex_str.replace(r"\hfill", "")
tex_str = tex_str.replace(r"\multicolumn{2}{c}", "")
tex_str = tex_str.replace(r"\left(\{\begin{array}{ccc}", r"\begin{cases} ")
tex_str = tex_str.replace(r"\end{array}\right)", r"\end{cases}")
tex_str = tex_str.replace(r"\{\begin{array}{ccc}", r"\begin{cases} ")
tex_str = tex_str.replace(r"\end{array}", r"\end{cases}")
# fix lambda function
tex_str = tex_str.replace(r"}\mathit", r"}, \mathit")
tex_str = tex_str.replace(r"\lambda ", r"\lambda(")
tex_str = tex_str.replace(r"}.", "}) =")
# cleanup symbols
tex_str = _fix_mathit_symbols(tex_str)
# print(tex_str)
# pmml_bytes = ET.tostring(pmml_dom, pretty_print=True)
# pmml_str = pmml_bytes.decode("UTF-8")
return tex_str
# symbols replaced in latex
[docs]greek_symbols = [
"alpha",
"beta",
"gamma",
"Gamma" "delta",
"Delta",
"epsilon",
"zeta",
"eta",
"theta",
"iota",
"kappa",
"Lambda", # no lowercase due to function definition
"mu",
"nu",
"omicron",
"pi" "rho",
"sigma",
"tau",
"upsilon",
"Upsilon",
"phi",
"Phi",
"chi",
"psi",
"Psi",
"omega",
"Omega",
]
[docs]def symbol_to_latex(symbol: str) -> str:
"""Convert symbol to latex by packing in mathit and escaping underscores."""
symbol = symbol.replace(r"_", r"\_")
symbol = r"\mathit{" + symbol + "}"
return _fix_mathit_symbols(symbol)
[docs]def _fix_mathit_symbols(tex_str: str) -> str:
"""Heuristic replacements for better latex rendering.
Single underscores are set down.
Greek symbols are rendered (with exception of small lambda).
"""
# fix single underscores in variable names
# \mathit{group1\_group2} -> \mathit{group1_{group2}}
matches = re.findall(r"\\mathit{([a-zA-Z0-9]+)\\_([a-zA-Z0-9]+)}", tex_str)
if matches:
for m in matches:
tex_str = tex_str.replace(
r"\mathit{" + m[0] + r"\_" + m[1] + "}",
r"\mathit{" + m[0] + r"_{" + m[1] + "}}",
)
# replace greek symbols
for symbol in greek_symbols:
tex_str = tex_str.replace(
r"\mathit{" + symbol + "}", r"\mathit{" + f"\{symbol}" + "}" # noqa: W605
)
return tex_str
[docs]def _get_variables(
astnode: libsbml.ASTNode, variables: Optional[Set[str]] = None
) -> Set[str]:
"""Get variables from ASTNode."""
if variables is None:
variables: Set[str] = set() # type: ignore
num_children = astnode.getNumChildren()
if num_children == 0:
if astnode.isName():
name = astnode.getName()
variables.add(name) # type: ignore
else:
for k in range(num_children):
child: libsbml.ASTNode = astnode.getChild(k)
_get_variables(child, variables=variables)
return variables # type: ignore