Source code for djenerator.generate_test_data

#!/usr/bin/env python
"""
This file generates random test data from sample given data for
given models.
"""
import inspect
import os
import random
from django.db.models import Model
from fields_generator import generate_random_values
from model_reader import is_auto_field
from model_reader import is_related
from model_reader import is_required
from model_reader import is_reverse_related
from model_reader import list_of_fields
from model_reader import list_of_models
from model_reader import module_import
from model_reader import relation_type
from utility import sort_unique_tuples
from utility import unique_items


[docs]def field_sample_values(field): """ Retrieves the list of sample values for a given field. :param DjangoField field: A reference to the class of the field. :rtype: List :returns: A list of sample values for the given field. """ list_field_values = [] if not is_auto_field(field): if is_reverse_related(field): # TODO(mostafa-mahmoud): Check if this case needs to be handled. pass elif is_related(field): model = field.rel.to list_field_values = list(model.objects.all()) if 'ManyToMany' in relation_type(field) and list_field_values: siz = random.randint(1, len(list_field_values)) list_field_values = [random.sample(list_field_values, siz)] else: found = False if hasattr(field.model, 'TestData'): model = field.model while (model.__base__ != Model and not hasattr(model.TestData, field.name)): model = model.__base__ if field.name in model.TestData.__dict__.keys(): found = True input_method = model.TestData.__dict__[field.name] if isinstance(input_method, str): app_name = field.model._meta.app_label path = '%s/TestTemplates/%s' % (app_name, input_method) input_file = open(path, 'r') list_field_values = [word[:-1] for word in input_file] elif (isinstance(input_method, list) or isinstance(input_method, tuple)): list_field_values = input_method else: if inspect.isfunction(input_method): list_field_values = input_method() if not found: app_name = field.model._meta.app_label path = '%s/TestTemplates/sample__%s__%s' % (app_name, field.model.__name__, field.name) if os.path.exists(path): input_file = open(path, 'r') list_field_values = [word[:-1] for word in input_file] else: list_field_values = generate_random_values(field) return list(list_field_values)
[docs]def dfs(instances, cur_tuple, index, to_be_computed, constraints, model, to_be_shuffled): """ Value generator for the fields of a given model by simulating a depth first search. The model will be saved in a (temporary) database. The interface of the predicate should be: boolean predicate(cur_tuple, model, field) - cur_tuple: List of tuples of the filled values of the field being filled, in the format (str:field_name , field_value). - model: A reference to the class of the given model. - field: A reference to the class of the field being generated The function should handle that the given tuple might be not full, and it should depend that the previously generated models are stored in the temporary database, and it should return a boolean value that's true only if the required constraint is satisfied. :param int instances: The target number of generated instances of the model. :param cur_tuple: A list of pairs str:field_name, field_value of the values of the filled fields. :type cur_tuple: List(pair(str, .)) :param int index: The index of the field being filled in the list of fields. :param List to_be_computed: A list used for accumulation of the ignored fields. :param List constraints: A list of predicate functions that will constraint the output. :param DjangoModel model: A reference to the class of the given model. :param boolean to_be_shuffled: A boolean variable that will determine if the sample data will be shuffled or not. :rtype: None """ fields = list_of_fields(model) if index >= len(fields): dfs.total += 1 create_model(model, cur_tuple) return 1 else: list_field_values = field_sample_values(fields[index]) if not list_field_values: many_to_many_related = (is_related(fields[index]) and 'ManyToMany' in relation_type(fields[index])) optional_field = not is_required(fields[index]) auto_fld = is_auto_field(fields[index]) if many_to_many_related or optional_field or auto_fld: if not is_auto_field(fields[index]): to_be_computed.append(fields[index]) return dfs(instances, cur_tuple, index + 1, to_be_computed, constraints, model, to_be_shuffled) else: if to_be_shuffled: random.shuffle(list_field_values) instances_so_far = 0 for field_id, nxt_field in enumerate(list_field_values): new_tuple = cur_tuple[:] new_tuple.append((fields[index].name, nxt_field)) are_constraints_satisfied = True for cons in constraints: if not cons(new_tuple, model, fields[index]): are_constraints_satisfied = False break if are_constraints_satisfied: instances_remaining = instances - instances_so_far remaining_values = len(list_field_values) - field_id value_instances = ((instances_remaining - 1 + remaining_values) / remaining_values) new_instances = dfs(value_instances, new_tuple, index + 1, to_be_computed, constraints, model, to_be_shuffled) instances_so_far += new_instances if instances_so_far >= instances or dfs.total >= dfs.size: return instances_so_far return instances_so_far
[docs]def generate_model(model, size, shuffle=None): """ Generate 'size' sample models given a model and stores them in a temporary data base. :param DjangoModel model: A reference to the class of the given model. :param int size: An integer of the size of the sample models to be generated. :param boolean shuffle: An boolean to decide if the sample input will be shuffled or not. Shuffles by default. :rtype: tuple :returns: A tuple that contains a reference to the class of the given model, and list of field that's not computed. """ unique_fields = [(field.name,) for field in list_of_fields(model) if (hasattr(field, 'unique') and field.unique and not is_auto_field(field))] unique_together = [] if hasattr(model._meta, 'unique_together'): unique_together = list(model._meta.unique_together) unique = unique_together + unique_fields unique = sort_unique_tuples(unique, model) unique_constraints = [unique_items(un_tuple) for un_tuple in unique] constraints = [] if hasattr(model, 'Constraints'): constraints = model.Constraints.constraints constraints += unique_constraints if shuffle is None: shuffle = True to_be_computed = [] dfs.size = size dfs.total = 0 dfs(size, [], 0, to_be_computed, constraints, model, shuffle) return model, to_be_computed
[docs]def create_model(model, val): """ Creates a new model given a reference to it's class and a list of the values of it's variables. :param DjangoModel model: A reference to the class of the model that will be created. :param val: A list of pairs having the format(field name, field value). :type val: tuple(pair(str, .)) :returns: A model with the values given. """ vals_dictionary = dict(val) have_many_to_many_relation = any(x for x in list_of_fields(model) if (is_related(x) and 'ManyToMany' in relation_type(x))) if not have_many_to_many_relation: mdl = model(**vals_dictionary) mdl.save() return mdl else: mdl = model() flds = list_of_fields(model) dict_T = {} for field in flds: dict_T[field.name] = relation_type(field) for key, val in vals_dictionary.items(): if 'ManyToMany' not in dict_T[key]: setattr(mdl, key, val) mdl.save() for key, val in vals_dictionary.items(): if 'ManyToMany' in dict_T[key]: for x in val: getattr(mdl, key).add(x) mdl.save() return mdl
[docs]def dependencies(model): """ Retrieves the models the must be generated before a given model. :param DjangoModel model: A reference to the class of the given model. :rtype: List :returns: list of references to the classes of the models. """ fields = list_of_fields(model) return [field.rel.to for field in fields if (is_required(field) and (is_related(field) and 'ManyToMany' not in relation_type(field)))]
[docs]def topological_sort(models): """ Sort a given list of models according to the dependencies of the relations between the models. :param List models: A list of references to the classes of the given models. :rtype: List :returns: A list of references to the classes of the given models. """ result = [] visited = [] S = filter(dependencies, models) def visit(model): if model not in visited: visited.append(model) for dep_model in dependencies(model): visit(dep_model) result.append(model) while S: model = S.pop(0) visit(model) result_singleton = [] for model in models: if model not in result: result_singleton.append(model) return result_singleton + result
[docs]def recompute(model, field): """ Recompute the previously ignored fields in the models. :param DjangoModel model: A reference to the class of the given model. :param DjangoField field: A reference to the class of the non-computed field. :rtype: None """ if is_related(field): models = model.objects.all() list_field_values = field_sample_values(field) random.shuffle(list_field_values) n = len(list_field_values) for index, mdl in enumerate(models): if ('ManyToMany' in relation_type(field) and not getattr(mdl, field.name).exists() or not is_required(field) and not getattr(mdl, field.name)): setattr(mdl, field.name, list_field_values[index % n]) mdl.save()
[docs]def generate_test_data(app_models, size, **size_options): """ Generates a list of 'size' random data for each model in the models module in the given path, If the sample data is not enough for generating 'size' models, then all of the sample data will be used. If the models are inconsistent then no data will be generated. The data will be stored in a temporary database used for generation. :param str app_models: A string that contains the path of the models module. :param int size: An integer that specifies the size of the generated data. :param dict size_options: A dictionary of that maps a str:model_name to int:model_size, that will be used as a size of the generated models. If a model is not in size_options then the default value 'size' will be used. :rtype: None """ models = topological_sort(list_of_models(module_import(app_models))) to_be_computed = [generate_model(model, (model.__name__ in size_options.keys() and size_options[model.__name__]) or size, True) for model in models] precomp = set([]) for mdl, flds in to_be_computed: for fld in flds: if (mdl, fld.name) not in precomp: precomp.add((mdl, fld.name)) recompute(mdl, fld)