Source code for wingbeats.modelling.builds

"""Library for factory functions to define model architectures within the Functional API"""
  


# Import libraries
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Lambda, Softmax
from tensorflow.keras import Input, regularizers

from wingbeats.modelling.layers import DenseBlock, L2_Norm, Identity



[docs]def build_embedder(in_shape, out_shape, f_extractor, reg_param = 1e-3, input_name = "input_signal", model_name = "Embedder", training = None): """Build model for learning hierarchical class embeddings. Architectures f_extractor(Layer) + Dense + L2 Outputs predicted embedding :param in_shape: Input shape. No need to specify batch dimension. :type in_shape: tuple :param out_shape: Model output shape (here equal to the size of embedded taxonomic level). :type out_shape: tuple :param f_extractor: Feature extractor. :type f_extractor: tf.Layer :param reg_param: Regularization parameter for the weights in the Dense layer. Defaults to 0.001. :type reg_param: float :param input_name: Name of the input. Defaults to 'input_signal'. :type input_name: str :param model_name: Name of the architecture. Defaults to 'Embedder'. :type model_name: str :param training: Whether to run model in training mode. Particularly relevant for layers such as \ Batch Normalization and Dropout. Defaults to *None*. :type training: bool, optional :return: Embedder """ # Define input sig = Input((in_shape), name = input_name) # Extract features features = f_extractor(sig, training = training) # Predict embedding pred_emb = embed(features, out_shape, reg_param, apply_l2 = True) # Build model model = Model(inputs = sig, outputs = pred_emb, name = model_name) return model
#############################################################################################
[docs]def build_simple_classifier(in_shape, out_shape, f_extractor, reg_param = 1e-3, taxonomic_levels = ['species'], input_name = "input_signal", model_name = "Simple_Classifier", training = None): """Build model for classifying signals according to only one taxonomic level i.e. genus or species. \ It is possible to extend the loss function to penalize the model for getting wrong \ higher hierarchies, as well (just add them to the tax_levels list). Architectures f_extractor(Layer) + Dense + Softmax Outputs predicted class probabilities :param in_shape: Input shape. No need to specify batch dimension. :type in_shape: tuple :param out_shape: Model output shape (here equal to the size of embedded taxonomic level). :type out_shape: tuple :param f_extractor: Feature extractor. :type f_extractor: tf.Layer :param reg_param: Regularization parameter for the weights in the Dense layer. Defaults to 0.001. :type reg_param: float :param taxonomic_levels: Taxonomic levels to include in the loss function. The model only predicts one \ but multiple superior levels can be inferred from the predicted one and penalized in the loss. Defaults to ['species']. :type taxonomic_levels: list :param input_name: Name of the input. Defaults to 'input_signal'. :type input_name: str :param model_name: Name of the architecture. Defaults to 'Simple_Classifier'. :type model_name: str :param training: Whether to run model in training mode. Particularly relevant for layers such as \ Batch Normalization and Dropout. Defaults to *None*. :type training: bool, optional :return: Simple Classifier """ # Define input sig = Input((in_shape), name = input_name) # Extract features features = f_extractor(sig, training = training) # Predict normalized probabilities outputs = [] pred_prob = predict_prob(features, out_shape, reg_param, taxonomic_levels[0], add_softmax = True, as_block = False) outputs.append(pred_prob) # Add penalty terms to the loss for other taxonomic levels as well if len(taxonomic_levels) > 1: for i in range(len(taxonomic_levels)-1): pred_prob_id = Lambda(lambda x: tf.identity(x), name = taxonomic_levels[i+1])(pred_prob) outputs.append(pred_prob_id) # Build model model = Model(inputs = sig, outputs = outputs, name = model_name) return model
#############################################################################################
[docs]def build_simple_embedder_classifier(in_shape, out_shapes, f_extractor, reg_param = 1e-3, taxonomic_levels = ['species'], input_name = "input_signal", model_name = "Simple_Embedder_Classifier", training = None): """Build model for learning the embedding of one taxonomic level \ and classifying signals according to only one taxonomic level (does not have to coincide to the embedding). \ It is possible to extend the loss function to penalize the model for getting wrong \ higher hierarchies, as well (just add them to the tax_levels list). Architectures (layers in brackets are branched out) f_extractor(Layer) + Dense(+ L2) + DenseBlock + Softmax Outputs predicted embedding and class probabilities :param in_shape: Input shape. No need to specify batch dimension. :type in_shape: tuple :param out_shape: Model output shape (here equal to the size of embedded taxonomic level). :type out_shape: tuple :param f_extractor: Feature extractor. :type f_extractor: tf.Layer :param reg_param: Regularization parameter for the weights in the Dense layer. Defaults to 0.001. :type reg_param: float :param taxonomic_levels: Taxonomic levels to include in the loss function. The model only predicts one \ but multiple superior levels can be inferred from the predicted one and penalized in the loss. Defaults to ['species']. :type taxonomic_levels: list :param input_name: Name of the input. Defaults to 'input_signal'. :type input_name: str :param model_name: Name of the architecture. Defaults to 'Simple_Embedder_Classifier'. :type model_name: str :param training: Whether to run model in training mode. Particularly relevant for layers such as \ Batch Normalization and Dropout. Defaults to *None*. :type training: bool, optional :return: Simple Embedder Classifier """ # Define input sig = Input((in_shape), name = input_name) # Extract features features = f_extractor(sig, training = training) # Predict embedding outputs = [] pred_emb = embed(features, out_shapes[0], reg_param, apply_l2 = False) # Note: identity function necessary in order to build a 2nd branch # if apply_l2 is True #pred_emb_out = Identity(name = 'embedding')(pred_emb) pred_emb_out = L2_Norm(name = 'embedding')(pred_emb) outputs.append(pred_emb_out) # Predict normalized probabilities pred_prob = predict_prob(pred_emb, out_shapes[1], reg_param, taxonomic_levels[0], add_softmax = True, as_block = True) outputs.append(pred_prob) # Add penalty terms to the loss for other taxonomic levels as well if len(taxonomic_levels) > 1: for i in range(len(taxonomic_levels)-1): pred_prob_id = Identity(name = taxonomic_levels[i+1])(pred_prob) outputs.append(pred_prob_id) # Build model model = Model(inputs = sig, outputs = outputs, name = model_name) return model
#############################################################################################
[docs]def build_hiera_classifier(in_shape, out_shapes, f_extractor, reg_param = 1e-3, taxonomic_levels = ['genus', 'species'], parallel = True, input_name = "input_signal", model_name = "Hiera_Classifier", training = None): """Build model for classifying signals according to more than one taxonomic level. Architectures (layers in brackets are branched out) - (series) f_extractor(Layer) + Dense(+ Softmax) + DenseBlock(+ Softmax) ... + DenseBlock + Softmax - (parallel) f_extractor(Layer) (+ Dense + Softmax) (+ Dense + Softmax) ... Outputs predicted class probabilities :param in_shape: Input shape. No need to specify batch dimension. :type in_shape: tuple :param out_shape: Model output shape (here equal to the size of embedded taxonomic level). :type out_shape: tuple :param f_extractor: Feature extractor. :type f_extractor: tf.Layer :param reg_param: Regularization parameter for the weights in the Dense layer. Defaults to 0.001. :type reg_param: float :param taxonomic_levels: Taxonomic levels to include in the loss function. The model only predicts one \ but multiple superior levels can be inferred from the predicted one and penalized in the loss. Defaults to ['genus', 'species']. :type taxonomic_levels: list :param parallel: Whether to attach parallel Dense layers for every prediction. Otherwise, they are connected one after another. Defaults to *True*. :type parallel: bool :param input_name: Name of the input. Defaults to 'input_signal'. :type input_name: str :param model_name: Name of the architecture. Defaults to 'Hiera_Classifier'. :type model_name: str :param training: Whether to run model in training mode. Particularly relevant for layers such as \ Batch Normalization and Dropout. Defaults to *None*. :type training: bool, optional :return: Hierarchical Classifier """ # Define input sig = Input((in_shape), name = input_name) # Extract features features = f_extractor(sig, training = training) # Predict normalized probabilities outputs = [] if parallel: for tax, out_shape in zip(taxonomic_levels, out_shapes): pred_prob = predict_prob(features, out_shape, reg_param, tax, add_softmax = True, as_block = False) outputs.append(pred_prob) else: # in series pred_prob = features as_block = False # only the first iteration needs a simple Dense layer, # the following ones need a DenseBlock for tax, out_shape in zip(taxonomic_levels, out_shapes): pred_prob = predict_prob(pred_prob, out_shape, reg_param, tax, add_softmax = False, as_block = as_block) pred_prob_out = Softmax(name = tax)(pred_prob) # branch out # Add the softmax probabilities to the outputs to be given to the loss, # but feed forward in the network the unnormalized ones outputs.append(pred_prob_out) as_block = True # Build model model = Model(inputs = sig, outputs = outputs, name = model_name) return model
#############################################################################################
[docs]def build_hiera_embedder_classifier(in_shape, out_shapes, f_extractor, reg_param = 1e-3, taxonomic_levels = ['genus', 'species'], parallel = True, input_name = "input_signal", model_name = "Hiera_Embedder_Classifier", training = None): """Build model for learning embeddings and classifying signals according to more than one taxonomic level. Architectures (layers in brackets are branched out) - (series) f_extractor(Layer) + Dense (+ L2) + DenseBlock(+ Softmax) + ... + DenseBlock + Softmax - (parallel) f_extractor(Layer) + Dense (+ L2) (+ DenseBlock + Softmax) ... Outputs predicted embedding and class probabilities :param in_shape: Input shape. No need to specify batch dimension. :type in_shape: tuple :param out_shape: Model output shape (here equal to the size of embedded taxonomic level). :type out_shape: tuple :param f_extractor: Feature extractor. :type f_extractor: tf.Layer :param reg_param: Regularization parameter for the weights in the Dense layer. Defaults to 0.001. :type reg_param: float :param taxonomic_levels: Taxonomic levels to include in the loss function. The model only predicts one \ but multiple superior levels can be inferred from the predicted one and penalized in the loss. Defaults to ['genus', 'species']. :type taxonomic_levels: list :param parallel: Whether to attach parallel Dense layers for every prediction. Otherwise, they are connected one after another. Defaults to *True*. :type parallel: bool :param input_name: Name of the input. Defaults to 'input_signal'. :type input_name: str :param model_name: Name of the architecture. Defaults to 'Hiera_Embedder_Classifier'. :type model_name: str :param training: Whether to run model in training mode. Particularly relevant for layers such as \ Batch Normalization and Dropout. Defaults to *None*. :type training: bool, optional :return: Hierarchical Embedder Classifier """ # Define input sig = Input((in_shape), name = input_name) # Extract features features = f_extractor(sig, training = training) # Predict embedding pred_emb = embed(features, out_shapes[0], reg_param, apply_l2 = False) # Note: identity function necessary in order to build a 2nd branch # if apply_l2 is True #pred_emb_out = Identity(name = 'embedding')(pred_emb) pred_emb_out = L2_Norm(name = 'embedding')(pred_emb) # Predict normalized probabilities outputs = [pred_emb_out] # embedded vector is the first output if parallel: for tax, out_shape in zip(taxonomic_levels, out_shapes[1:]): # first out_shape was for embedding pred_prob = predict_prob(pred_emb, out_shape, reg_param, tax, add_softmax = True, as_block = True) outputs.append(pred_prob) else: # in series pred_prob = pred_emb for tax, out_shape in zip(taxonomic_levels, out_shapes[1:]): pred_prob = predict_prob(pred_prob, out_shape, reg_param, tax, add_softmax = False, as_block = True) pred_prob_out = Softmax(name = tax)(pred_prob) # branch out # Add the softmax probabilities to the outputs to be given to the loss, # but feed forward in the network the unnormalized ones outputs.append(pred_prob_out) # Build model model = Model(inputs = sig, outputs = outputs, name = model_name) return model
############################################################################################# ############################################################################################# """Auxiliary functions called inside the *build*-functions"""
[docs]def embed(x, out_shape, reg_param = 1e-3, apply_l2 = False): """Compute embedding of a vector by passing it through a Dense layer \ and l2-normalizing it (meant to be used as module in models with embedding layers). :param x: Input features. :type x: Tensor :param out_shape: Dense layer output shape. :type out_shape: int :param reg_param: Regularization parameter for the weights in the Dense layer. Defaults to 0.001. :type reg_param: float :param apply_l2: Whether to l2-normalize the features output by the Dense layer. Defaults to *False*. :type apply_l2: bool :return: Predicted embedding. """ pred_emb = Dense(out_shape, dtype = 'float32', name = 'dense_emb', kernel_regularizer = regularizers.l2(reg_param))(x) if apply_l2: pred_emb = L2_Norm(name = 'embedding')(pred_emb) return pred_emb
#############################################################################################
[docs]def predict_prob(x, out_shape, reg_param = 1e-3, taxonomic_level = 'species', add_softmax = True, as_block = False): """Compute (normalized) probabilities of signal belonging to different classes of specified taxonomic level. :param x: Input features. :type x: Tensor :param out_shape: Dense layer output shape. :type out_shape: int :param reg_param: Regularization parameter for the weights in the Dense layer. Defaults to 0.001. :type reg_param: float :param taxonomic_level: Predicted taxonomic level. Defaults to 'species'. :type taxonomic_level: str :param add_softmax: Whether to normalize the probabilities with a Softmax layer. Defaults to *True*. :type add_softmax: bool :param as_block: Whether to pass **x** through a simple Dense layer or a Dense block. See wingbeats.modelling.layers. Defaults to *False*. :type as_block: bool :return: Predicted probability vector. """ # Pass vector through simple Dense layer or through Dense block if as_block: pred_prob = DenseBlock(out_shape, reg_param, name = 'dense_'+taxonomic_level)(x) else: pred_prob = Dense(out_shape, dtype = 'float32', name = 'dense_'+taxonomic_level, kernel_regularizer = regularizers.l2(reg_param))(x) if add_softmax: pred_prob = Softmax(name = taxonomic_level)(pred_prob) return pred_prob