Source code for nbgrader_jupyterquiz.grader.parse

"""Parse quiz question source from notebook cell markdown."""

import copy
import dataclasses
from typing import Any

import jsonschema.exceptions

from nbgrader_jupyterquiz.grader import validate


[docs] class ParseError(Exception): """Raised when quiz source cannot be parsed."""
[docs] @dataclasses.dataclass class Quiz: """ A parsed quiz with options, a list of question dicts, and parse-time warnings. ``warnings`` collects non-fatal issues the parser spotted (e.g. an ``MC`` question with 0 or 1 correct answers). Fatal issues raise :class:`ParseError` instead. Callers such as the ``CreateQuiz`` preprocessor surface these through ``nbgrader``'s UI logger. """ options: dict[str, Any] questions: list[dict[str, Any]] warnings: list[str] = dataclasses.field(default_factory=list)
[docs] def parse_cell( source: str, begin_quiz_delimiter: str = "#### Quiz", end_quiz_delimiter: str = "#### End Quiz", ) -> tuple[list[Quiz], list[str]]: """ Parse quiz regions from a notebook cell source string. Parameters ---------- source : str Full source text of the markdown cell. begin_quiz_delimiter : str, optional Marker that opens a quiz region. end_quiz_delimiter : str, optional Marker that closes a quiz region. Returns ------- quizzes : list[Quiz] Parsed Quiz objects. Non-fatal parse-time warnings are attached to each quiz's ``warnings`` field. cell_contents : list[str] Remaining cell lines with quiz regions removed. """ quizzes_lines, cell_contents = find_quiz_regions(source, begin_quiz_delimiter, end_quiz_delimiter) quizzes = [] for header, quiz_lines in quizzes_lines: quiz_options = parse_quiz_options(header) question_lines = split_questions(quiz_lines) questions = [] warnings: list[str] = [] for lines in question_lines: question = parse_question(lines) # Propagate the quiz-level hide_correctness option onto the # question tree. MC / many-choice answers consume ``hide`` # per-answer (see multiple_choice.js ``check_mc``); numeric # consumes ``hide`` per-question (see numeric.js # ``check_numeric``). if quiz_options.get("hide_correctness"): if question["type"] in ("multiple_choice", "many_choice"): for answer in question["answers"]: answer.setdefault("hide", True) elif question["type"] == "numeric": question.setdefault("hide", True) try: validate.validate_question(question) except jsonschema.exceptions.ValidationError: raise if warning := _check_choice_cardinality(question): warnings.append(warning) questions.append(question) if not questions: raise ParseError("Quiz region without any parsable questions found.") # If any question in this quiz carries an explicit ``points`` value # (``{N}`` marker), set the default ``points: 1`` on every other # question so the rendered quiz displays a badge on every question # consistently. When no question has explicit points, leave the # field unset — the quiz is unweighted and the display stays clean. if any("points" in q for q in questions): for q in questions: q.setdefault("points", 1) quizzes.append(Quiz(quiz_options, questions, warnings)) return quizzes, cell_contents
def _check_choice_cardinality(question: dict[str, Any]) -> str | None: """ Enforce SC/MC correct-answer counts declared by the instructor. Single-choice (``SC`` → ``multiple_choice``) must have exactly one correct answer — raises :class:`ParseError` otherwise. Many-choice (``MC`` → ``many_choice``) may have any count, but 0 or 1 correct answers return a warning string since the instructor likely meant ``SC`` (for exactly 1) or a numeric/string question (for 0). Parameters ---------- question : dict Parsed question dict (already schema-validated). Non-choice types are silently ignored. Returns ------- str or None A warning message for the caller to surface, or ``None`` when the question is fine (or unhandled). """ if question.get("type") not in ("multiple_choice", "many_choice"): return None n_correct = sum(1 for a in question.get("answers", []) if a.get("correct")) qtext = question.get("question", "") if question["type"] == "multiple_choice" and n_correct != 1: raise ParseError( f"Single-choice (SC) question must have exactly one correct answer, found {n_correct}: {qtext!r}. Use (MC) for multi-answer questions.", ) if question["type"] == "many_choice" and n_correct <= 1: return f"Many-choice (MC) question has {n_correct} correct answer(s): {qtext!r}. Consider (SC) for single-answer questions." return None
[docs] def redact_answer_key(questions: list[dict[str, Any]]) -> list[dict[str, Any]]: """ Return a deep copy of ``questions`` with answer-key fields stripped. The release notebook embeds question JSON into a hidden span the student's browser loads. Without redaction, the student can read the answer key out of the DOM. Stripping the matching fields makes ``hide_correctness`` mode actually withhold the key, not just the visual feedback. The redaction is keyed off question ``type``: - ``multiple_choice`` / ``many_choice``: drop ``correct`` from each answer. Keep ``answer``, ``code``, ``feedback``, ``hide``. - ``numeric``: drop ``value``, ``range``, ``correct`` from each answer. Keep ``feedback`` and ``type=default`` entries so fall-through "Incorrect, try again" feedback still works. - ``string``: replace ``answers`` with an empty list. String questions are server-graded; the JS path only runs in self-check mode (no hide-correctness), so an empty list is sufficient when this function is called. Per-answer ``feedback`` strings are intentionally preserved — they are pedagogically valuable, and instructors who want them hidden can omit them from the question source. Parameters ---------- questions : list[dict] The full parsed-question list (typically ``Quiz.questions``). Not mutated. Returns ------- list[dict] Deep copy of ``questions`` with answer-key fields removed. Safe to serialise into the release notebook's display JSON. """ redacted = copy.deepcopy(questions) for question in redacted: qtype = question.get("type") if qtype in ("multiple_choice", "many_choice"): for answer in question.get("answers", []): answer.pop("correct", None) elif qtype == "numeric": for answer in question.get("answers", []): answer.pop("value", None) answer.pop("range", None) answer.pop("correct", None) # The ``type`` field tags the parser's match style # ("value" / "range" / "default"); stripping the matching # fields above leaves any non-default tag dangling and # leaks "there was a value match here". Only the # ``default`` tag is consumed by numeric.js. if answer.get("type") != "default": answer.pop("type", None) elif qtype == "string": question["answers"] = [] return redacted
[docs] def find_quiz_regions( source: str, begin_quiz_delimiter: str = "#### Quiz", end_quiz_delimiter: str = "#### End Quiz", ) -> tuple[list[tuple[str, list[str]]], list[str]]: """ Extract regions within quiz delimiters. Parameters ---------- source : str Full source text of the markdown cell. begin_quiz_delimiter : str, optional Marker that opens a quiz region. end_quiz_delimiter : str, optional Marker that closes a quiz region. Returns ------- quizzes : list[tuple[str, list[str]]] Each entry is ``(options_header, quiz_lines)``. remaining_lines : list[str] Lines that fall outside any quiz region. """ quizzes: list[tuple[str, list[str]]] = [] remaining_lines: list[str] = [] quiz_options = "" quiz_lines: list[str] = [] in_quiz_region = False for line in source.split("\n"): if line.strip().startswith(begin_quiz_delimiter): if in_quiz_region: raise RuntimeError("Encountered nested quiz delimiters") in_quiz_region = True quiz_options = line.strip().removeprefix(begin_quiz_delimiter) quiz_lines = [] elif line.strip().startswith(end_quiz_delimiter): if not in_quiz_region: raise RuntimeError("Encountered quiz end without beginning") in_quiz_region = False quizzes.append((quiz_options, quiz_lines)) elif in_quiz_region: quiz_lines.append(line) else: remaining_lines.append(line) if in_quiz_region: raise RuntimeError(f"Cell ended without {end_quiz_delimiter = }") return quizzes, remaining_lines
def _open_delim(text: str) -> str | None: r""" Return the delimiter currently open at the end of ``text``, or ``None`` if balanced. Used by :func:`split_questions` to decide whether a multi-line field has been completed. Recognises the same delimiters :func:`_scan_field` does: triple-backtick code blocks, same-character ``"..."`` (with ``\\`` and ``\"`` escapes), and paired ``(...)``, ``[...]``, ``{...}``, ``<...>`` with depth tracking on the field's own pair (other delimiter characters inside are inert). Parameters ---------- text : str A buffer of accumulated quiz source — typically the ``\n``-joined physical lines of an in-progress logical line. Returns ------- str or None The opening delimiter still awaiting its closer, or ``None`` if the buffer is balanced. Useful for both flow control and error messages. """ pairs = {"(": ")", "[": "]", "{": "}", "<": ">"} i = 0 n = len(text) while i < n: if text.startswith("```", i): j = _skip_code_block(text, i) if j is None: return "```" i = j elif text[i] == '"': j = _skip_quoted(text, i) if j is None: return '"' i = j elif text[i] in pairs: j = _skip_paired(text, i, text[i], pairs[text[i]]) if j is None: return text[i] i = j else: i += 1 return None def _skip_code_block(text: str, start: int) -> int | None: """ Skip past a triple-backtick code block in :func:`_open_delim`. Parameters ---------- text : str Buffer being scanned. start : int Index of the opening triple-backtick in ``text``. Returns ------- int or None Index just past the closing triple-backtick, or ``None`` if the block is unterminated. """ j = start + 3 n = len(text) while j < n and not text.startswith("```", j): j += 1 return None if j >= n else j + 3 def _skip_quoted(text: str, start: int) -> int | None: r""" Skip past a same-character ``"..."`` field in :func:`_open_delim`. Recognises ``\\`` and ``\"`` as escape sequences. Parameters ---------- text : str Buffer being scanned. start : int Index of the opening ``"`` in ``text``. Returns ------- int or None Index just past the closing ``"``, or ``None`` if the field is unterminated. """ j = start + 1 n = len(text) while j < n: if text[j] == "\\" and j + 1 < n and text[j + 1] in '\\"': j += 2 continue if text[j] == '"': return j + 1 j += 1 return None def _skip_paired(text: str, start: int, left: str, right: str) -> int | None: r""" Skip past a paired delimited field in :func:`_open_delim`. Tracks depth on the field's own ``left``/``right`` pair; other delimiter characters are inert. Recognises ``\<left>``, ``\<right>``, and ``\\`` as escape sequences. Parameters ---------- text : str Buffer being scanned. start : int Index of the opening ``left`` in ``text``. left : str Opening delimiter character. right : str Closing delimiter character. Returns ------- int or None Index just past the matching closing delimiter, or ``None`` if the field is unterminated. """ depth = 1 j = start + 1 n = len(text) while j < n and depth > 0: if text[j] == "\\" and j + 1 < n and text[j + 1] in (left, right, "\\"): j += 2 continue if text[j] == left: depth += 1 elif text[j] == right: depth -= 1 j += 1 return None if depth > 0 else j
[docs] def split_questions(quiz_source: list[str]) -> list[list[str]]: r""" Split lines of a quiz region into individual question blocks. Each returned question is a list of *logical* lines — one for the question itself, followed by one per answer. A logical line may span multiple physical source lines when its content extends across them, joined with ``\n``. Continuation is driven by markdown-list indentation: a physical line is a continuation of the active logical line iff its first-non-whitespace column is strictly greater than the opener's column (``0`` for questions, ``2`` for answers). Code blocks delimited by triple-backticks are an exception — physical lines inside an open code block are appended regardless of indentation, matching markdown's fenced-code semantics. Parameters ---------- quiz_source : list[str] Physical lines within a quiz delimiter region. Returns ------- list[list[str]] Each inner list contains the question logical line followed by its answer logical lines. Each logical line is a single ``str`` (possibly containing ``\n``). Raises ------ ParseError If a logical line ends with an unclosed delimited field — i.e. indentation drops back to the opener's level (or beyond) while the field is still expecting its closer. """ questions: list[list[str]] = [] current_question: list[str] | None = None current_logical: list[str] | None = None current_base_indent: int | None = None def flush_logical() -> None: """Close the active logical line, validating its delimiters.""" nonlocal current_logical, current_base_indent if current_logical is None: return logical_text = "\n".join(current_logical) open_delim = _open_delim(logical_text) if open_delim is not None: raise ParseError( f"Unterminated {open_delim!r} delimiter in quiz logical line: {logical_text!r}", ) # current_question must already exist if we reached here; # the opener handlers ensure that. if current_question is None: # pragma: no cover raise ParseError("internal: flush_logical called without active question") current_question.append(logical_text) current_logical = None current_base_indent = None for line in quiz_source: stripped = line.lstrip() indent = len(line) - len(stripped) # Inside a code block currently open in the active logical # buffer? — ignore indentation rules and append. if current_logical is not None: buffer_so_far = "\n".join(current_logical) if _open_delim(buffer_so_far) == "```": current_logical.append(line) continue # Pure-blank line: append to active logical if one exists, # otherwise drop. A trailing blank is fine; a blank in the # middle of a multi-line field is preserved. if not stripped: if current_logical is not None: current_logical.append(line) continue # Question opener: `* ` at column 0. if indent == 0 and stripped.startswith("* "): flush_logical() if current_question is not None: questions.append(current_question) current_question = [] current_logical = [line] current_base_indent = 0 continue # Answer opener: `+` or `-` at column 2. if indent == 2 and (stripped.startswith("+") or stripped.startswith("-")) and current_question is not None: flush_logical() current_logical = [line] current_base_indent = 2 continue # Continuation of the active logical line: indented strictly # deeper than the opener. if current_logical is not None and current_base_indent is not None and indent > current_base_indent: current_logical.append(line) continue # Otherwise: outside content (e.g. comment between quizzes). # Close any active logical and ignore. flush_logical() flush_logical() if current_question is not None: questions.append(current_question) return questions
[docs] def parse_quiz_options(header: str) -> dict[str, Any]: """ Parse quiz options from the header line following the begin delimiter. Parameters ---------- header : str Text on the same line as the begin delimiter, after the delimiter itself. Expected format: space-separated ``key=value`` pairs. Boolean values are ``true`` or ``false`` (case-insensitive). ``filename`` takes a string value. Unrecognised keys are ignored. Returns ------- dict Quiz options dict with keys ``encoded``, ``inline``, ``hidden``, ``filename``, ``hide_correctness``, ``graded``. Omitted keys retain their defaults. - ``hide_correctness=true`` propagates ``hide: true`` to every MC / many-choice answer so the display hides correctness feedback and shows a neutral Selected / Deselected state instead. Default ``None`` — the preprocessor treats ``None`` as "off unless the host cell is graded" and ``True``/``False`` as explicit opt-in/opt-out. - ``graded=false`` opts a single quiz out of auto-grading inside a task cell — the generated cell is a plain ``display_quiz(...)`` code cell with no nbgrader metadata, no hidden tests, and correctness feedback visible. Default ``None`` — the preprocessor treats ``None`` as "graded iff the host task cell has a ``grade_id`` and ``auto_generate_tests`` is on". """ result: dict[str, Any] = { "encoded": True, "inline": True, "hidden": True, "filename": None, "hide_correctness": None, "graded": None, } for token in header.split(): if "=" not in token: continue key, _, val = token.partition("=") if key == "filename": result["filename"] = val elif val.lower() == "true": result[key] = True elif val.lower() == "false": result[key] = False return result
[docs] def parse_question(lines: list[str]) -> dict[str, Any]: """ Parse a question block into a question dict. Parameters ---------- lines : list[str] First line is the question line; remaining lines are answer lines. Returns ------- dict Question dict matching the jupyterquiz schema. """ question = line_to_question(lines[0]) if question["type"] == "numeric": line_to_answer = line_to_numeric_answer else: line_to_answer = line_to_mc_answer question["answers"] = [line_to_answer(line) for line in lines[1:]] return question
def _scan_field(s: str, left: str, right: str) -> tuple[str, str]: r""" Extract a delimited field's content from ``s``, returning the content and remainder. The string ``s`` must start with ``left``. Walks forward to find the matching closing delimiter and returns the captured inner content plus whatever remains after the closing delimiter. The exact tokenisation rule depends on the delimiters: - **Paired delimiters** (``left != right``, e.g. ``"("`` / ``")"``) track depth on the field's *own* pair only. A nested ``left`` increments depth; a ``right`` at depth 0 closes the field. Other delimiter characters inside (``{``, ``[``, ``"``, etc.) are inert text. This lets ``(Correct (with caveats))`` parse as one feedback field with content ``Correct (with caveats)``, and ``(feedback { )`` parse as ``feedback { `` (the unmatched ``{`` is just content). - **Same-character delimiters** (``left == right``, e.g. ``"\""`` / ``"\""``) recognise backslash escapes: ``\\`` is a literal ``\``, and ``\<right>`` is a literal of the closing delimiter character. Other backslash sequences (``\int``, ``\alpha``, etc.) pass through unchanged so LaTeX content survives. - **Multi-character delimiters** (e.g. triple-backtick ``"```"``) are treated like same-character delimiters but without escape support — the scanner walks character-by-character looking for the literal closing sequence. ``"``, ``(``, ``)``, ``\``, etc. inside are all inert. This matches markdown's fenced-code semantics. Parameters ---------- s : str Input string starting with ``left``. left : str Opening delimiter. May be one or more characters. right : str Closing delimiter. May be one or more characters. Returns ------- extracted : str The content between the opening and closing delimiters (exclusive on both sides), with escapes resolved. remainder : str Whatever follows the closing delimiter in ``s``. Raises ------ ParseError If the closing delimiter is never found. """ if not s.startswith(left): raise ParseError(f"_scan_field called on string not starting with {left!r}: {s!r}") i = len(left) n = len(s) paired = left != right and len(left) == 1 and len(right) == 1 same_char_single = left == right and len(left) == 1 depth = 1 # opening `left` already consumed out_chars: list[str] = [] while i < n: ch = s[i] # Single-char delimiters honour backslash escapes. ``\<right>`` # and ``\\`` are unescaped; ``\<left>`` is also unescaped on # paired delimiters so an unmatched ``(`` (e.g. an emoticon # ``:(`` inside feedback) can be written as ``\(``. Any other # ``\X`` passes through unchanged so LaTeX commands (``\int``, # ``\alpha``) survive. if (paired or same_char_single) and ch == "\\" and i + 1 < n: nxt = s[i + 1] if nxt == right or nxt == "\\" or (paired and nxt == left): out_chars.append(nxt) i += 2 continue # fall through: literal backslash, then handle nxt next iteration # Paired delimiters: track depth on `left`/`right`. if paired: if ch == left: depth += 1 out_chars.append(ch) i += 1 continue if ch == right: depth -= 1 if depth == 0: return "".join(out_chars), s[i + 1 :] out_chars.append(ch) i += 1 continue out_chars.append(ch) i += 1 continue # Multi-character or same-character delimiters: look for the # literal closing sequence at the current position. if s.startswith(right, i): return "".join(out_chars), s[i + len(right) :] out_chars.append(ch) i += 1 raise ParseError(f"Unterminated field: missing closing {right!r} after {left!r}: {s!r}")
[docs] def parse_line(line: str, **components: tuple[str, str, Any]) -> dict[str, Any]: r""" Parse delimited components from a line and typecast them. Parameters ---------- line : str Text to parse. \*\*components : tuple[str, str, Any] Each keyword is a component name mapped to a ``(left_delim, right_delim, typecast)`` triple. Returns ------- dict Parsed components. Raises ------ ParseError If a duplicate component is found, the line ends with an unparsable segment, or a delimited field is not closed. """ parsed = {} while line: line = line.strip() if not line: break for component, (left, right, typecast) in components.items(): if line.startswith(left): if component in parsed: raise ParseError(f"Duplicate component {component} found.") extracted, line = _scan_field(line, left, right) parsed[component] = typecast(extracted) break else: raise ParseError(f"Non-parsable component found. Left to parse: {line!r}") return parsed
def _normalise_code_block(code: str) -> str: r""" Normalise a captured code-block string for downstream display. Two transformations are applied: - Literal ``\n`` (the two-character sequence backslash + ``n``) is replaced with a real newline. This preserves the v0.4.x authoring convention of writing single-line code blocks with embedded ``\n`` markers. - Leading and trailing newline whitespace is stripped. Multi-line fenced code (`` \`\`\` ``-on-its-own-line opening / closing) naturally surrounds its content with newlines; users expect those to vanish, matching markdown's fenced-code semantics. Parameters ---------- code : str Raw captured content from the ``\`\`\`...\`\`\``` field. Returns ------- str Normalised code suitable for display. """ return code.replace(r"\n", "\n").strip("\n")
[docs] def line_to_question(line: str) -> dict[str, Any]: """ Parse a question line into a partial question dict (without answers). Parameters ---------- line : str Question line starting with ``*``. Returns ------- dict Partial question dict with ``type``, ``question``, and optional fields. """ question_types = {"NM": "numeric", "SC": "multiple_choice", "MC": "many_choice"} def _parse_points(raw: str) -> int | float: """ Parse a ``{N}`` points marker, preserving integers when possible. Parameters ---------- raw : str Contents between the ``{`` and ``}`` delimiters. Returns ------- int or float ``int`` for whole-number markers (``{3}``); ``float`` for fractional markers (``{0.5}``). """ value = float(raw) return int(value) if value.is_integer() else value components = { "type": ("(", ")", lambda t: question_types.get(t)), "question": ('"', '"', str), "code": ("```", "```", _normalise_code_block), "precision": ("[", "]", int), "answer_cols": ("<", ">", int), "points": ("{", "}", _parse_points), } return parse_line(line.lstrip(" *"), **components)
[docs] def line_to_numeric_answer(line: str) -> dict[str, Any]: """ Parse a numeric answer line. Parameters ---------- line : str Answer line starting with ``+`` (correct) or ``-`` (incorrect). Returns ------- dict Answer dict with ``correct``, ``type``, and value/range/feedback. """ line = line.strip() answer: dict[str, Any] = {"correct": line.startswith("+")} line = line.lstrip("-+ ") components = { "feedback": ("(", ")", str), "value": ("<", ">", float), "range": ("[", "]", lambda r: list(map(float, r.split(",", maxsplit=1)))), } answer |= parse_line(line, **components) if "value" in answer and "range" in answer: raise ParseError(f"Answer to numeric question has both value and range: {line!r}") elif "value" in answer: answer["type"] = "value" elif "range" in answer: answer["type"] = "range" else: answer["type"] = "default" return answer
[docs] def line_to_mc_answer(line: str) -> dict[str, Any]: """ Parse a multiple/many-choice answer line. Parameters ---------- line : str Answer line starting with ``+`` (correct) or ``-`` (incorrect). Returns ------- dict Answer dict with ``correct``, ``answer``, and optional fields. """ line = line.lstrip() answer: dict[str, Any] = {"correct": line.startswith("+")} line = line.lstrip("-+ ") components = { "feedback": ("(", ")", str), "answer": ('"', '"', str), "code": ("```", "```", _normalise_code_block), } answer |= parse_line(line, **components) return answer