Source code for mathmaker.lib.tools.xml

# -*- coding: utf-8 -*-

# Mathmaker creates automatically maths exercises sheets
# with their answers
# Copyright 2006-2017 Nicolas Hainaux <nh.techn@gmail.com>

# This file is part of Mathmaker.

# Mathmaker is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# any later version.

# Mathmaker is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with Mathmaker; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

import os
import errno
import subprocess
import copy
import logging
import random
import xml.etree.ElementTree as XML_PARSER

from mathmaker import settings
from mathmaker.lib.constants import BOOLEAN, DEFAULT_LAYOUT
from mathmaker.lib.tools import parse_layout_descriptor
from mathmaker.lib.tools.frameworks import _match_qid_sourcenb, parse_qid


# So far, quite useless features, so disabled on august 8th, 2017
# SWAPPABLE_QKINDS_QSUBKINDS = {("rectangle", "area"),
#                               ("rectangle", "perimeter"),
#                               ("square", "area"),
#                               ("square", "perimeter")}
#
# KINDS_SUBKINDS_CONTEXTS_TO_TRANSLATE = {
#     ('divi', 'direct', 'area_width_length_rectangle'):
#     ('rectangle', 'length_or_width', 'from_area')}


[docs]def get_xml_schema_path():
    return settings.frameworksdir + 'sheet.xsd'


[docs]def get_xml_sheets_paths():
    """
    Returns all paths to default xml frameworks.

    They are returned as a dictionary like:
    {id: path_to_matching_file.xml, ...}
    the id being the filename without its extension.

    :rtype: dict
    """
    # We assume all files are to be found as:
    # frameworks/theme_name/subtheme_name/filename.xml
    files = [settings.frameworksdir + d + '/' + sd + '/' + f
             for d in next(os.walk(settings.frameworksdir))[1]
             for sd in next(os.walk(settings.frameworksdir + d))[1]
             for f in next(os.walk(settings.frameworksdir + d + '/' + sd))[2]]
    return {os.path.splitext(os.path.basename(f))[0]: f
            for f in files
            if os.path.splitext(f)[1] == '.xml'}


def _read_layout(node, config, layout):
    config.update(node.attrib)
    keep_default_w, keep_default_a = True, True
    spacing = {'spacing_w': 'undefined', 'spacing_a': 'undefined'}
    for part in node:
        s = part.attrib.get('spacing', 'undefined')
        if s != 'jump to next page':
            if part.tag == 'wordings':
                spacing['spacing_w'] = s
            if part.tag == 'answers':
                spacing['spacing_a'] = s
        # part is either wordings or answers
        rowxcol = part.attrib.get('rowxcol', 'none')
        distri = part.attrib.get('print', 'auto')
        if rowxcol == 'none':
            if distri == 'auto':
                distri = 'all'
            else:
                try:
                    distri = int(distri)
                except ValueError:
                    raise ValueError('XMLFileFormatError: a print '
                                     'attribute cannot be turned into int.')
            if not (s == 'jump to next page'
                    and 'rowxcol' not in part.attrib
                    and 'print' not in part.attrib):
                if part.tag == 'wordings':
                    if keep_default_w:
                        layout['exc'] = [None, distri]
                        keep_default_w = False
                    else:
                        layout['exc'] += [None, distri]
                else:
                    if keep_default_a:
                        layout['ans'] = [None, distri]
                        keep_default_a = False
                    else:
                        layout['ans'] += [None, distri]
        else:
            nrow, ncol = parse_layout_descriptor(rowxcol, sep=['×', 'x'],
                                                 special_row_chars=['?'])
            colwidths = part.attrib.get('colwidths', 'auto')
            if colwidths == 'auto':
                colwidths = [int(18 // ncol) for _ in range(ncol)]
            else:
                colwidths = [int(n) for n in colwidths.split(sep=' ')]
                if not len(colwidths) == ncol:
                    raise ValueError(
                        'XMLFileFormatError: in a <layout>, the number of'
                        'columns '
                        'widths does not match the number of cols in '
                        'the rowxcol attribute.')
            if part.tag == 'wordings':
                if keep_default_w:
                    layout['exc'] = [[nrow, ] + colwidths]
                    keep_default_w = False
                else:
                    layout['exc'].append([nrow, ] + colwidths)
            else:
                if keep_default_a:
                    layout['ans'] = [[nrow, ] + colwidths]
                    keep_default_a = False
                else:
                    layout['ans'].append([nrow, ] + colwidths)
            if distri == 'auto':
                distri = ' '.join(['1' for i in range(ncol * nrow)])
            distri = distri.replace(',', ' ').replace(';', ' ')
            distri = tuple(int(n) for n in distri.split())
            if part.tag == 'wordings':
                layout['exc'].append(distri)
            else:
                layout['ans'].append(distri)
        if s == 'jump to next page':
            if part.tag == 'wordings':
                if keep_default_w:
                    layout['exc'] = ['jump', 'next_page']
                    keep_default_w = False
                else:
                    layout['exc'] += ['jump', 'next_page']
            if part.tag == 'answers':
                if keep_default_a:
                    layout['ans'] = ['jump', 'next_page']
                    keep_default_a = False
                else:
                    layout['ans'] += ['jump', 'next_page']
    config.update(spacing)
    return config, layout


def _get_layout_from(node, default_config=None):
    default_layout = copy.deepcopy(DEFAULT_LAYOUT)

    config = default_config

    for child in node:
        if child.tag == 'layout':
            return _read_layout(child, config, default_layout)

    return config, default_layout


[docs]def get_sheet_config(file_name):
    """
    Retrieves the sheet configuration values from *file_name*.

    :param file_name: The XML file name.
    :type file_name: str
    :rtype: tuple
    """
    # Validation of the xml file
    # xmllint --noout --schema sheet.xsd file_name
    with open(get_xml_schema_path(), 'r'):
        call_xmllint = subprocess.Popen([settings.xmllint,
                                         "--noout",
                                         "--schema",
                                         get_xml_schema_path(),
                                         file_name],
                                        stderr=subprocess.PIPE)
        returncode = call_xmllint.wait()
        if returncode != 0:
            raise ValueError(
                '\nXMLFileFormatError: xmllint exited with a return code '
                'of ' + str(returncode) + '\n'
                'xmllint error message is:\n'
                '' + str(call_xmllint.stderr.read().decode(encoding='UTF-8')))

    xml_doc = XML_PARSER.parse(file_name).getroot()

    config, sheet_layout = \
        _get_layout_from(xml_doc, default_config={'type': 'default',
                                                  'unit': 'cm',
                                                  'font_size_offset': '0'})

    return (xml_doc.attrib["header"],
            xml_doc.attrib["title"],
            xml_doc.attrib["subtitle"],
            xml_doc.attrib["text"],
            xml_doc.attrib["answers_title"],
            config["type"],
            int(config["font_size_offset"]),
            config["unit"],
            sheet_layout,
            xml_doc.attrib.get('preset', 'default')
            )


[docs]def check_q_consistency(q_attrib, sources):
    """
    (Unfinished) Check the consistency of question's kind, subkind and source.
    """
    q_kind_subkind = '_'.join(parse_qid(q_attrib['id']))
    if (q_kind_subkind == 'intercept_theorem_triangle'
        and sources[0].startswith('ext_proportionality_quadruplet')):
        # __
        mini, maxi = sources[0].split(sep='_')[3].split(sep='to')
        if int(mini) < 11:
            raise ValueError('XMLFileFormatError: for intercept_theorem'
                             '_triangle questions, the minimum number '
                             'should be 11. Here it is only {}.'
                             .format(mini))
        if int(maxi) - int(mini) < 19:
            raise ValueError('XMLFileFormatError: for intercept_theorem'
                             '_triangle questions, the range between '
                             'minimum and maximum should be at '
                             'least 19. Here it is only {}.'
                             .format(str(int(maxi) - int(mini))))
    if (q_kind_subkind == 'intercept_theorem_triangle_formula'
        and not sources[0] == 'nothing'):
        # __
        raise ValueError('XMLFileFormatError: for intercept_theorem'
                         '_triangle_formula questions, the only possible '
                         'source is \'nothing\'. \'{}\' is not correct.'
                         .format(sources[0]))


def _get_q_list_from(exercise_node):
    """
    Retrieves the exercise kind and the questions from one exercise section.

    :param exercise_node: The XML node of the exercise.
    :type exercise_node:
    :rtype: tuple
    """
    questions = []
    # For instance we will get a list of this kind of elements:
    # [{'id': 'multi direct', 'nb': 'int'}, ['table_2_9'], 4]
    # [{'id': 'expand_and_reduce double_expansion'},
    #  ['table_2_9'],
    #  4]

    for child in exercise_node:
        if child.tag == 'question':
            # Useless features, so far, hence disabled on august 8th, 2017
            # If this would be to re-enable, take care attrib has no kind and
            # subkind attributes any more.
            # if ((child.attrib['kind'], child.attrib['subkind'])
            #         in SWAPPABLE_QKINDS_QSUBKINDS):
            #     (child.attrib['kind'], child.attrib['subkind'])\
            #         = (child.attrib['subkind'], child.attrib['kind'])
            #
            # if 'context' in child.attrib:
            #     if ((child.attrib['kind'],
            #         child.attrib['subkind'],
            #         child.attrib['context'])
            #             in KINDS_SUBKINDS_CONTEXTS_TO_TRANSLATE):
            #         (child.attrib['kind'],
            #          child.attrib['subkind'],
            #          child.attrib['context']) = \
            #             KINDS_SUBKINDS_CONTEXTS_TO_TRANSLATE[
            #             (child.attrib['kind'],
            #              child.attrib['subkind'],
            #              child.attrib['context'])]
            for elt in child:
                o = copy.deepcopy(child.attrib)
                o.update(elt.attrib)
                sources = elt.attrib['source'].split(sep=';;')
                check_q_consistency(o, sources)
                questions += [[o, sources, int(elt.text)]]

        elif child.tag == 'mix':
            q_temp_list = []
            n_temp_list = []
            mix_questions = []
            for elt in child:
                if elt.tag == 'question':
                    pick = int(elt.attrib.pop('pick', 1))
                    q_temp_list += [elt.attrib for i in range(pick)]
                elif elt.tag == 'nb':
                    # We don't check that 'source' is in elt.attrib,
                    # this should have been checked by the xml schema,
                    # nor we don't check if the source tag is valid.
                    # This would be best done by the xml schema
                    # (requires to use xsd1.1 but lxml validates only
                    # xsd1.0). So far, it is done partially and later,
                    # in lib/tools.py (the tag functions)
                    # So far it's not possible to mix questions
                    # requiring several sources with other questions
                    # yet multiple sources questions can be mixed together
                    # if they are the same type.
                    n_temp_list += [[[elt.attrib['source']],
                                     elt.attrib,
                                     1] for i in range(int(elt.text))]
                else:
                    raise ValueError(
                        'XMLFileFormatError: unknown element found in '
                        'the xml file: ' + elt.tag)

            if len(q_temp_list) > len(n_temp_list):
                raise ValueError(
                    'XMLFileFormatError: incorrect mix section: the number '
                    'of sources of numbers (' + str(len(n_temp_list)) + ') '
                    'must be at least equal to the number of questions '
                    '(' + str(len(q_temp_list)) + ').')

            # So far, we only check if all of the numbers' sources
            # may be attributed to any of the questions, in order
            # to just distribute them all randomly.
            for n in n_temp_list:
                for q in q_temp_list:
                    v = n[1].get('variant', q.get('variant', ''))
                    if (not _match_qid_sourcenb(q['id'].replace(' ', '_'),
                                                n[0], v)):
                        # __
                        raise ValueError(
                            'XMLFileFormatError: this source: '
                            + str(n[0]) + ' cannot '
                            'be attributed to this question:'
                            ' ' + str(q['id'].replace(' ', '_')))

            random.shuffle(q_temp_list)
            if any(BOOLEAN[n[1].get('required', 'false')]()
                   for n in n_temp_list):
                required_n_temp_list = [n for n in n_temp_list
                                        if BOOLEAN[n[1].get('required',
                                                            'false')]()]
                rest_n_temp_list = [n for n in n_temp_list
                                    if not BOOLEAN[n[1].get('required',
                                                            'false')]()]
                random.shuffle(required_n_temp_list)
                random.shuffle(rest_n_temp_list)
                n_temp_list = required_n_temp_list + rest_n_temp_list
            else:
                random.shuffle(n_temp_list)

            for (q, n) in zip(q_temp_list, n_temp_list):
                merged_q = copy.deepcopy(q)
                merged_q.update(n[1])
                mix_questions += [[merged_q, n[0], 1]]

            random.shuffle(mix_questions)
            questions += mix_questions

    return questions


[docs]def get_exercises_list(file_name):
    """
    Retrieves the exercises' list from *file_name*.

    :param file_name: The XML file name.
    :type file_name: str
    :rtype: list
    """
    mainlogger = logging.getLogger("__main__")
    try:
        xml_doc = XML_PARSER.parse(file_name).getroot()
    except FileNotFoundError:
        mainlogger.error('FileNotFoundError: ' + file_name)
        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                                str(file_name))
    exercises_list = []
    for child in xml_doc:
        if child.tag == 'exercise':
            exercises_list += [(_get_q_list_from(child),
                                _get_layout_from(child, default_config={}),
                                child.attrib, )]
    return exercises_list