Skip to content

Commit 7b9f56a

Browse files
committed
MDL-83541 question/quiz: restoring questions with identical stamps
Historically it was possible, through a series of question restores, moves and edits, to end up with multiple questions in the same category with the same stamp, but differences in other question or answer fields. This, combined with changes in versions, led to errors when restoring or duplicating quizzes using these questions. While recent changes have made it impossible to create this situation in current Moodle versions, as any edits will create a new question version with a new stamp, this situation may exist on long-standing Moodle sites which have been upgraded since pre-4.0. This change performs a much wider-ranging comparison of restored existing questions, generating a hash of all the data for a question in a backup file, and a corresponding hash for each question in the target category, to decide if a restored question matches a question already in the database.
1 parent f91e3d4 commit 7b9f56a

File tree

6 files changed

+938
-15
lines changed

6 files changed

+938
-15
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
issueNumber: MDL-83541
2+
notes:
3+
core_question:
4+
- message: >
5+
Duplication or multiple restores of questions has been modified to avoid
6+
errors where a question with the same stamp already exists in the target
7+
category.
8+
9+
To achieve this all data for the question is hashed, excluding any ID
10+
fields.
11+
12+
If a qtype plugin calls any `$this->add_question_*()` methods in its
13+
`restore_qtype_*_plugin::define_question_plugin_structure()` method, the
14+
ID fields used in these records will be excluded automatically.
15+
16+
If a qtype plugin defines its own tables with ID fields, it must define
17+
`restore_qtype_*_plugin::define_excluded_identity_hash_fields()` to return
18+
an array of paths to these fields within the question data. This should be
19+
all that is required for the majority of plugins.
20+
See the PHPDoc of `restore_qtype_plugin::define_excluded_identity_hash_fields()`
21+
for a full explanation of how these paths should be defined, and
22+
`restore_qtype_truefalse_plugin` for an example.
23+
24+
If the data structure for a qtype returned by calling
25+
`get_question_options()` contains data other than ID fields that are not
26+
contained in the backup structure or vice-versa, it will need to
27+
override `restore_qtype_*_plugin::remove_excluded_question_data()`
28+
to remove the inconsistent data. See `restore_qtype_multianswer_plugin` as
29+
an example.
30+
type: fixed

backup/moodle2/restore_qtype_plugin.class.php

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ abstract class restore_qtype_plugin extends restore_plugin {
4747
*/
4848
private $questionanswercacheid = null;
4949

50+
/**
51+
* @var array List of fields to exclude form hashing during restore.
52+
*/
53+
protected array $excludedhashfields = [];
54+
5055
/**
5156
* Add to $paths the restore_path_elements needed
5257
* to handle question_answers for a given question
@@ -62,6 +67,10 @@ protected function add_question_question_answers(&$paths) {
6267
$elename = 'question_answer';
6368
$elepath = $this->get_pathfor('/answers/answer'); // we used get_recommended_name() so this works
6469
$paths[] = new restore_path_element($elename, $elepath);
70+
$this->exclude_identity_hash_fields([
71+
'/options/answers/id',
72+
'/options/answers/question',
73+
]);
6574
}
6675

6776
/**
@@ -78,6 +87,10 @@ protected function add_question_numerical_units(&$paths) {
7887
$elename = 'question_numerical_unit';
7988
$elepath = $this->get_pathfor('/numerical_units/numerical_unit'); // we used get_recommended_name() so this works
8089
$paths[] = new restore_path_element($elename, $elepath);
90+
$this->exclude_identity_hash_fields([
91+
'/options/units/id',
92+
'/options/units/question',
93+
]);
8194
}
8295

8396
/**
@@ -94,6 +107,7 @@ protected function add_question_numerical_options(&$paths) {
94107
$elename = 'question_numerical_option';
95108
$elepath = $this->get_pathfor('/numerical_options/numerical_option'); // we used get_recommended_name() so this works
96109
$paths[] = new restore_path_element($elename, $elepath);
110+
$this->exclude_identity_hash_fields(['/options/question']);
97111
}
98112

99113
/**
@@ -114,6 +128,21 @@ protected function add_question_datasets(&$paths) {
114128
$elename = 'question_dataset_item';
115129
$elepath = $this->get_pathfor('/dataset_definitions/dataset_definition/dataset_items/dataset_item');
116130
$paths[] = new restore_path_element($elename, $elepath);
131+
$this->exclude_identity_hash_fields([
132+
'/options/datasets/id',
133+
'/options/datasets/question',
134+
'/options/datasets/category',
135+
'/options/datasets/type',
136+
'/options/datasets/items/id',
137+
// The following fields aren't included in the backup or DB structure, but are parsed from the options field.
138+
'/options/datasets/status',
139+
'/options/datasets/distribution',
140+
'/options/datasets/minimum',
141+
'/options/datasets/maximum',
142+
'/options/datasets/decimals',
143+
// This field is set dynamically from the count of items in the dataset, it is not backed up.
144+
'/options/datasets/number_of_items',
145+
]);
117146
}
118147

119148
/**
@@ -395,4 +424,184 @@ public static function define_plugin_decode_contents() {
395424

396425
return $contents;
397426
}
427+
428+
/**
429+
* Add fields to the list of fields excluded from hashing.
430+
*
431+
* This allows common methods to add fields to the exclusion list.
432+
*
433+
* @param array $fields
434+
* @return void
435+
*/
436+
private function exclude_identity_hash_fields(array $fields): void {
437+
$this->excludedhashfields = array_merge($this->excludedhashfields, $fields);
438+
}
439+
440+
/**
441+
* Return fields to be excluded from hashing during restores.
442+
*
443+
* @return array
444+
*/
445+
final public function get_excluded_identity_hash_fields(): array {
446+
return array_unique(array_merge(
447+
$this->excludedhashfields,
448+
$this->define_excluded_identity_hash_fields(),
449+
));
450+
}
451+
452+
/**
453+
* Return a list of paths to fields to be removed from questiondata before creating an identity hash.
454+
*
455+
* Fields that should be excluded from common elements such as answers or numerical units that are used by the plugin will
456+
* be excluded automatically. This method just needs to define any specific to this plugin, such as foreign keys used in the
457+
* plugin's tables.
458+
*
459+
* The returned array should be a list of slash-delimited paths to locate the fields to be removed from the questiondata object.
460+
* For example, if you want to remove the field `$questiondata->options->questionid`, the path would be '/options/questionid'.
461+
* If a field in the path is an array, the rest of the path will be applied to each object in the array. So if you have
462+
* `$questiondata->options->answers[]`, the path '/options/answers/id' will remove the 'id' field from each element of the
463+
* 'answers' array.
464+
*
465+
* @return array
466+
*/
467+
protected function define_excluded_identity_hash_fields(): array {
468+
return [];
469+
}
470+
471+
/**
472+
* Convert the backup structure of this question type into a structure matching its question data
473+
*
474+
* This should take the hierarchical array of tags from the question's backup structure, and return a structure that matches
475+
* that returned when calling {@see get_question_options()} for this question type.
476+
* See https://docs.moodle.org/dev/Question_data_structures#Representation_1:_%24questiondata for an explanation of this
477+
* structure.
478+
*
479+
* This data will then be used to produce an identity hash for comparison with questions in the database.
480+
*
481+
* This base implementation deals with all common backup elements created by the add_question_*_options() methods in this class,
482+
* plus elements added by ::define_question_plugin_structure() named for the qtype. The question type will need to extend
483+
* this function if ::define_question_plugin_structure() adds any other elements to the backup.
484+
*
485+
* @param array $tags The array of tags from the backup.
486+
* @return \stdClass The questiondata object.
487+
*/
488+
public static function convert_backup_to_questiondata(array $tags): \stdClass {
489+
$questiondata = (object) array_filter($tags, fn($tag) => !is_array($tag)); // Create an object from the top-level fields.
490+
$qtype = $questiondata->qtype;
491+
$questiondata->options = new stdClass();
492+
if (isset($tags["plugin_qtype_{$qtype}_question"][$qtype])) {
493+
$questiondata->options = (object) $tags["plugin_qtype_{$qtype}_question"][$qtype][0];
494+
}
495+
if (isset($tags["plugin_qtype_{$qtype}_question"]['answers'])) {
496+
$questiondata->options->answers = array_map(
497+
fn($answer) => (object) $answer,
498+
$tags["plugin_qtype_{$qtype}_question"]['answers']['answer'],
499+
);
500+
}
501+
if (isset($tags["plugin_qtype_{$qtype}_question"]['numerical_options'])) {
502+
$questiondata->options = (object) array_merge(
503+
(array) $questiondata->options,
504+
$tags["plugin_qtype_{$qtype}_question"]['numerical_options']['numerical_option'][0],
505+
);
506+
}
507+
if (isset($tags["plugin_qtype_{$qtype}_question"]['numerical_units'])) {
508+
$questiondata->options->units = array_map(
509+
fn($unit) => (object) $unit,
510+
$tags["plugin_qtype_{$qtype}_question"]['numerical_units']['numerical_unit'],
511+
);
512+
}
513+
if (isset($tags["plugin_qtype_{$qtype}_question"]['dataset_definitions'])) {
514+
$questiondata->options->datasets = array_map(
515+
fn($dataset) => (object) $dataset,
516+
$tags["plugin_qtype_{$qtype}_question"]['dataset_definitions']['dataset_definition'],
517+
);
518+
}
519+
if (isset($questiondata->options->datasets)) {
520+
foreach ($questiondata->options->datasets as $dataset) {
521+
if (isset($dataset->dataset_items)) {
522+
$dataset->items = array_map(
523+
fn($item) => (object) $item,
524+
$dataset->dataset_items['dataset_item'],
525+
);
526+
unset($dataset->dataset_items);
527+
}
528+
}
529+
}
530+
if (isset($tags['question_hints'])) {
531+
$questiondata->hints = array_map(
532+
fn($hint) => (object) $hint,
533+
$tags['question_hints']['question_hint'],
534+
);
535+
}
536+
537+
return $questiondata;
538+
}
539+
540+
/**
541+
* Remove excluded fields from the questiondata structure.
542+
*
543+
* This removes fields that will not match or not be present in the question data structure produced by
544+
* {@see self::convert_backup_to_questiondata()} and {@see get_question_options()} (such as IDs), so that the remaining data can
545+
* be used to produce an identity hash for comparing the two.
546+
*
547+
* For plugins, it should be sufficient to override {@see self::define_excluded_identity_hash_fields()} with a list of paths
548+
* specific to the plugin type. Overriding this method is only necessary if the plugin's
549+
* {@see question_type::get_question_options()} method adds additional data to the question that is not included in the backup.
550+
*
551+
* @param stdClass $questiondata
552+
* @param array $excludefields Paths to the fields to exclude.
553+
* @return stdClass The $questiondata with excluded fields removed.
554+
*/
555+
public static function remove_excluded_question_data(stdClass $questiondata, array $excludefields = []): stdClass {
556+
// All questions will need to exclude 'id' (used by question and other tables), 'questionid' (used by hints and options),
557+
// 'createdby' and 'modifiedby' (since they won't map between sites).
558+
$defaultexcludes = [
559+
'/id',
560+
'/createdby',
561+
'/modifiedby',
562+
'/hints/id',
563+
'/hints/questionid',
564+
'/options/id',
565+
'/options/questionid',
566+
];
567+
$excludefields = array_unique(array_merge($excludefields, $defaultexcludes));
568+
569+
foreach ($excludefields as $excludefield) {
570+
$pathparts = explode('/', ltrim($excludefield, '/'));
571+
$data = $questiondata;
572+
self::unset_excluded_fields($data, $pathparts);
573+
}
574+
575+
return $questiondata;
576+
}
577+
578+
/**
579+
* Iterate through the elements of path to an excluded field, and unset the final element.
580+
*
581+
* If any of the elements in the path is an array, this is called recursively on each element in the array to unset fields
582+
* in each child of the array.
583+
*
584+
* @param stdClass|array $data The questiondata object, or a subsection of it.
585+
* @param array $pathparts The remaining elements in the path to the excluded field.
586+
* @return void
587+
*/
588+
private static function unset_excluded_fields(stdClass|array $data, array $pathparts): void {
589+
while (!empty($pathparts)) {
590+
$element = array_shift($pathparts);
591+
if (!isset($data->{$element})) {
592+
// This element is not present in the data structure, nothing to unset.
593+
return;
594+
}
595+
if (is_object($data->{$element})) {
596+
$data = $data->{$element};
597+
} else if (is_array($data->{$element})) {
598+
foreach ($data->{$element} as $item) {
599+
self::unset_excluded_fields($item, $pathparts);
600+
}
601+
} else if (empty($pathparts)) {
602+
// This is the last element of the path and it's a scalar value, unset it.
603+
unset($data->{$element});
604+
}
605+
}
606+
}
398607
}

backup/tests/quiz_restore_decode_links_test.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ public function test_restore_quiz_decode_links(): void {
6464

6565
$questiondata = \question_bank::load_question_data($question->id);
6666

67+
$DB->set_field('question', 'questiontext', $CFG->wwwroot . '/mod/quiz/view.php?id=' . $quiz->cmid, ['id' => $question->id]);
68+
6769
$firstanswer = array_shift($questiondata->options->answers);
6870
$DB->set_field('question_answers', 'answer', $CFG->wwwroot . '/course/view.php?id=' . $course->id,
6971
['id' => $firstanswer->id]);
@@ -87,6 +89,7 @@ public function test_restore_quiz_decode_links(): void {
8789
$questionids = [];
8890
foreach ($quizquestions as $quizquestion) {
8991
if ($quizquestion->questionid) {
92+
$this->assertEquals($CFG->wwwroot . '/mod/quiz/view.php?id=' . $quiz->cmid, $quizquestion->questiontext);
9093
$questionids[] = $quizquestion->questionid;
9194
}
9295
}

backup/util/dbops/restore_dbops.class.php

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ public static function precheck_categories_and_questions($restoreid, $courseid,
567567
* @return array A separate list of all error and warnings detected
568568
*/
569569
public static function prechek_precheck_qbanks_by_level($restoreid, $courseid, $userid, $samesite, $contextlevel) {
570-
global $DB;
570+
global $DB, $CFG;
571571

572572
// To return any errors and warnings found
573573
$errors = array();
@@ -669,21 +669,37 @@ public static function prechek_precheck_qbanks_by_level($restoreid, $courseid, $
669669
} else {
670670
self::set_backup_ids_record($restoreid, 'question_category', $category->id, $matchcat->id, $targetcontext->id);
671671
$questions = self::restore_get_questions($restoreid, $category->id);
672+
$transformer = self::get_backup_xml_transformer($courseid);
672673

673674
// Collect all the questions for this category into memory so we only talk to the DB once.
674-
$questioncache = $DB->get_records_sql_menu('SELECT q.stamp, q.id
675-
FROM {question} q
676-
JOIN {question_versions} qv
677-
ON qv.questionid = q.id
678-
JOIN {question_bank_entries} qbe
679-
ON qbe.id = qv.questionbankentryid
680-
JOIN {question_categories} qc
681-
ON qc.id = qbe.questioncategoryid
682-
WHERE qc.id = ?', array($matchcat->id));
675+
$recordset = $DB->get_recordset_sql(
676+
"SELECT q.*
677+
FROM {question} q
678+
JOIN {question_versions} qv ON qv.questionid = q.id
679+
JOIN {question_bank_entries} qbe ON qbe.id = qv.questionbankentryid
680+
JOIN {question_categories} qc ON qc.id = qbe.questioncategoryid
681+
WHERE qc.id = ?",
682+
[$matchcat->id],
683+
);
684+
685+
// Compute a hash of question and answer fields to differentiate between identical stamp-version questions.
686+
$questioncache = [];
687+
foreach ($recordset as $question) {
688+
$question->export_process = true; // Include all question options required for export.
689+
get_question_options($question);
690+
unset($question->export_process);
691+
// Remove some additional properties from get_question_options() that isn't included in backups
692+
// before we produce the identity hash.
693+
unset($question->categoryobject);
694+
unset($question->questioncategoryid);
695+
$cachekey = restore_questions_parser_processor::generate_question_identity_hash($question, $transformer);
696+
$questioncache[$cachekey] = $question->id;
697+
}
698+
$recordset->close();
683699

684700
foreach ($questions as $question) {
685-
if (isset($questioncache[$question->stamp])) {
686-
$matchqid = $questioncache[$question->stamp];
701+
if (isset($questioncache[$question->questionhash])) {
702+
$matchqid = $questioncache[$question->questionhash];
687703
} else {
688704
$matchqid = false;
689705
}
@@ -1918,6 +1934,22 @@ public static function delete_course_content($courseid, ?array $options = null)
19181934
private static function password_should_be_discarded(#[\SensitiveParameter] string $password): bool {
19191935
return (bool) preg_match('/^[0-9a-f]{32}$/', $password);
19201936
}
1937+
1938+
/**
1939+
* Load required classes and return a backup XML transformer for the specified course.
1940+
*
1941+
* These classes may not have been loaded if we're only doing a restore in the current process,
1942+
* so make sure we have them here.
1943+
*
1944+
* @param int $courseid
1945+
* @return backup_xml_transformer
1946+
*/
1947+
protected static function get_backup_xml_transformer(int $courseid): backup_xml_transformer {
1948+
global $CFG;
1949+
require_once($CFG->dirroot . '/backup/util/includes/backup_includes.php');
1950+
require_once($CFG->dirroot . '/backup/moodle2/backup_plan_builder.class.php');
1951+
return new backup_xml_transformer($courseid);
1952+
}
19211953
}
19221954

19231955
/*

0 commit comments

Comments
 (0)