Posts in category python

Bayesian updating for a simple concrete creep model

Hello,

Here is my first try with bayesian updating in openturns. This is open for discussion !

The model is a concrete creep model (called beton_burger_fp in Code_Aster) for an uniaxial creep test during 60 years. Data are available for 10 years.

6 inputs are k_rs, k_rd, eta_rs, eta_rd, eta_id, kappa.

Outputs are:

  1. a priori versus a posteriori inputs distribution
  2. a priori and a posteriori mean response and 95%-quantile of the response.

The main script is actu_bayes.py. fluage.data and StepB.py are used by actu_bayes.py.

fluage.data:

 0.              0.
2.46E-09       -1.71E-19
3.74E+03       2.20E-05
1.95E+04       1.15E-04
1.29E+05       3.54E-05
1.34E+05       6.47E-05
2.69E+05       1.40E-04
1.01E+06       1.99E-04
1.88E+06       2.35E-04
2.12E+06       2.43E-04
2.37E+06       2.54E-04
2.74E+06       2.60E-04
3.11E+06       2.78E-04
4.46E+06       2.99E-04
5.32E+06       3.07E-04
5.94E+06       3.18E-04
6.18E+06       3.37E-04
7.53E+06       3.44E-04
8.15E+06       3.50E-04
8.64E+06       3.56E-04
9.62E+06       3.65E-04
1.07E+07       3.83E-04
1.21E+07       4.03E-04
1.32E+07       4.26E-04
1.53E+07       4.23E-04
1.60E+07       4.38E-04
1.85E+07       4.40E-04
1.99E+07       4.50E-04
2.26E+07       4.78E-04
2.55E+07       4.85E-04
2.59E+07       5.07E-04
2.94E+07       5.07E-04
3.34E+07       5.27E-04
3.44E+07       5.39E-04
3.72E+07       5.57E-04
4.07E+07       5.71E-04
4.90E+07       6.08E-04
7.55E+07       6.92E-04
9.46E+07       7.60E-04
1.14E+08       8.25E-04
1.32E+08       8.58E-04
1.61E+08       9.87E-04
1.99E+08       1.12E-03
2.27E+08       1.21E-03
2.55E+08       1.31E-03
2.84E+08       1.41E-03
3.12E+08       1.51E-03

StepB.py:

  # -*- coding: utf-8 -*-

from openturns import *

import numpy as np

dim_in  = 7        # Nombre de variables d'entree
dim_out = nb_data+nb_disc  # Nombre de variables de sortie

# Fonction de transfert

class myfunction(OpenTURNSPythonFunction):
        def __init__(self):
                OpenTURNSPythonFunction.__init__(self, dim_in, dim_out)
        def _exec(self, X):     
                        
                v_KRS   = X[0];
                v_KRD   = X[1];
                v_ETARS = X[2];
                v_ETARD = X[3];
                v_ETAID = X[4];
                v_KAPPA = X[5];

                nu = 0.2; #coefficient de poisson elastique

                T_FIN = 1.892E9; #60 ans

                TEMPS = np.zeros(dim_out)
  
                for i in range(0,len(temps)):
                  TEMPS[i] = np.array(temps[i])

                T_FIN_data = TEMPS[nb_data-1]

                SIGMA_0 = 12E6;

                ETA_IS = (1+nu)/(1-2*nu)*v_ETAID
                B = v_KAPPA/(np.sqrt(1.+2.*nu**2));
                C = SIGMA_0*np.sqrt(1.+2.*nu**2)/(v_KAPPA*(1.+nu)*v_ETAID);
                C2 = SIGMA_0*np.sqrt(1.+2.*nu**2)/(v_KAPPA*(1.-2.*nu)*ETA_IS);
                D = -v_KRD/v_ETARD;
                D2 = -v_KRS/v_ETARS;

                EPSA = [];
                EPS1 = [];

                for i in range(1,nb_disc+1):
                        TEMPS[nb_data+i-1] = T_FIN_data+i*(T_FIN-T_FIN_data)/nb_disc
                for i in range(dim_out):
                        EPSA.append(-SIGMA_0/3.*(1./v_KRS*(1.-np.exp(D2*TEMPS[i]))+2./v_KRD*(1.-np.exp(D*TEMPS[i])))-B*np.log(1.+(3.*SIGMA_0*np.sqrt(1.+2.*nu**2)*TEMPS[i])/(v_KAPPA*(ETA_IS*(1.-2.*nu)+2.*v_ETAID*(1.+nu)))));
                        EPS1.append(-EPSA[i]);


                return EPS1

model = NumericalMathFunction(myfunction())

actu_bayes.py

# -*- coding: utf-8 -*-
from openturns import *
import numpy as np
import pylab as pl
from scipy.stats import norm
import time
import scipy.optimize as so
import scipy.stats as st

###################
# Parametres civaux
###################

#Proprietes elastiques
nu = 0.2; #coefficient de poisson elastique
sigma_0 = 12E6;

#########
# Instants des donnees experimentales
#########

data       = open('fluage.data','r')

liste = []
liste = data.readlines()
nb_data = len(liste)
data.close()

temps = NumericalSample(nb_data,1)

i=0
for j in range(len(liste)):
  ligne1 = liste[j].split()
  temps[i,0]=float(ligne1[0])
  i = i+1

######
# Definition du modele
######

def buildModel(temps):

  # Express eta_0_is in terms of the other parameters
  eta_0_is = "eta_0_id * (" + str((nu + 1.0) / (1.0 - 2.0 * nu)) + ")"
  # Convert all the fixed numerical values into strings
  # Protect them with parenthesis to avoid troubles with negative values
  sigma_0_str = str(sigma_0)
  t_str = "(" + str(temps) + ")"
  factor = np.sqrt(1.0 + 2.0 * nu**2)
  term0 = "(" + str(factor) + ")"
  term1 = "(" + str(3.0 * sigma_0 * factor) + ")"
  term2 = "(" + str(1.0 - 2.0 * nu) + ")"
  term3 = "(" + str(1.0 + nu) + ")"
  # The expression, at last...
  return NumericalMathFunction(["k_rs", "k_rd", "eta_rs", "eta_rd", "eta_0_id", "kappa", "log_sigma"], [sigma_0_str + " / 3 * (1.0 / k_rs * (1.0 - exp(-k_rs / eta_rs * " + t_str + ")) + 2 / k_rd * (1.0 - exp(-k_rd / eta_rd * " + t_str + "))) + kappa / " + term0 + " * log(1.0 + " + term1 + " / (kappa * (" + eta_0_is + " * " + term2 + " + 2.0 * eta_0_id * " + term3 + ")) * " + t_str + ")", "exp(log_sigma)"])

# Version Numpy du modele 

T = np.array(temps).ravel()

def modeleDeformation(theta):
    k_rs,k_rd,eta_rs,eta_rd,eta_id,kappa, log_sigma = theta

    eta_is = (1+nu)/(1-2*nu)*eta_id
    B   = kappa/(np.sqrt(1+2*nu*nu))
    C   = sigma_0*np.sqrt(1+2*nu*nu)/(kappa*(1+nu)*eta_id)
    C2  = sigma_0*np.sqrt(1+2*nu*nu)/(kappa*(1-2*nu)*eta_is)
    D   = -k_rd/eta_rd
    D2  = -k_rs/eta_rs

    return np.hstack((sigma_0/3*(1/k_rs*(1-np.exp(D2*T))+2/k_rd*(1-np.exp(D*T))) \
            + B*np.log(1+(3*sigma_0*np.sqrt(1+2*nu**2)*T)/(kappa*(eta_is*(1-2*nu)+2*eta_id*(1+nu)))),\
            np.exp(log_sigma)))

###
# Mesures de deformation
###

yobs= [ 0., -1.71E-19, 2.20E-05, 1.15E-04, 3.54E-05, 6.47E-05, 1.40E-04, 1.99E-04, \
 2.35E-04, 2.43E-04, 2.54E-04, 2.60E-04, 2.78E-04, 2.99E-04, 3.07E-04, 3.18E-04, \
 3.37E-04, 3.44E-04, 3.50E-04, 3.56E-04, 3.65E-04, 3.83E-04, 4.03E-04, 4.26E-04, \
 4.23E-04, 4.38E-04, 4.40E-04, 4.50E-04, 4.78E-04, 4.85E-04, 5.07E-04, 5.07E-04, \
 5.27E-04, 5.39E-04, 5.57E-04, 5.71E-04, 6.08E-04, 6.92E-04, 7.60E-04, 8.25E-04, \
 8.58E-04, 9.87E-04, 1.12E-03, 1.21E-03, 1.31E-03, 1.41E-03, 1.51E-03]

obsSize = len(yobs)

yobs = np.atleast_2d(yobs).T
yobs = NumericalSample(yobs)

modelCollection=NumericalMathFunctionCollection()

for iaux in range(nb_data):
  modelCollection.add(buildModel(temps[iaux,0]))

model = NumericalMathFunction(modelCollection)

conditional = Normal()

# reference
Moyenne = [2.26E10, 3.46E10, 1.97E17, 1.E17, 2.51E18, 2.51E-3]

# on donne plus ou moins de poids au modele : 1/sigma^2
# variable tres importante
StdDev = NumericalPoint([elt for elt in NumericalPoint(np.hstack(([0.15*elt for elt in Moyenne],-np.log(.00025))))])
CorrMatrix = CorrelationMatrix(7)

for iaux in range(7):
  CorrMatrix[iaux,iaux] = StdDev[iaux]*StdDev[iaux]

Prior = Normal(NumericalPoint(np.hstack((Moyenne,.00025))),CorrMatrix)

Theta0 = Prior.getMean()

print "Moyenne prior " , NumericalPoint(np.hstack((Moyenne,.00025)))
print "Prior.getMean : " , Prior.getMean()


proposal = DistributionCollection()
for iaux in range(len(StdDev)):
    width = 0.15*StdDev[iaux]
    proposal.add(Uniform(-width,width))

RWMHsampler = RandomWalkMetropolisHastings(Prior,conditional,model,yobs,Theta0,proposal)

strategy = CalibrationStrategyCollection()
aStrategy = CalibrationStrategy(Interval(0.,1.))
for iaux in range(7):
  strategy.add(aStrategy)

RWMHsampler.setCalibrationStrategyPerComponent(strategy)

RWMHsampler.setVerbose(True)
RWMHsampler.setThinning(1)
RWMHsampler.setBurnIn(100)

sampleSize = 200

t1 = time.time()
sample = RWMHsampler.getSample(sampleSize)
t2 = time.time()
print 'acceptance rate = ',
print RWMHsampler.getAcceptanceRate()


myRandomVector = PosteriorRandomVector(RWMHsampler)
# AUGMENTER A 100000 pour avoir des distributions tres plates
n = 5000

posterior_sample = myRandomVector.getSample(n)
np_posterior_sample = np.array(posterior_sample)
#np_posterior_sample = np.array(sample)

post_mean = np_posterior_sample.mean(axis=0)

#Posterior plot
nsample = 1000

pl.clf()
pl.figure(999)
for i in xrange(nsample):
    bb = pl.plot(T/3600./24./365.,modeleDeformation(np_posterior_sample[np.random.randint(n)])[:-1], 'r:')

cc = pl.plot(T/3600./24./365.,modeleDeformation(post_mean)[:-1], 'k', lw=2)
aa = pl.scatter(T/3600./24./365.,yobs, s=20, c='g',zorder=10)

pl.legend((aa, bb, cc), ("obs", "Loi a posteriori", "Moyenne a posteriori"), loc="upper left")
pl.xlabel("Temps (annees)")
pl.ylabel("Deformation differee")
pl.savefig("AjustementMoyenneAPosteriori.pdf")
pl.xlim(0,3.)
pl.ylim(0,0.001)
pl.savefig("AjustementMoyenneAPosterioriZoom.pdf")


pl.figure()
for i in range(7):
    #pl.figure(50+i)
    pl.subplot(7,1,i+1)
    pl.plot(range(n), np_posterior_sample[:,i])

filename = "cv_posterior_var.pdf"
pl.savefig(filename)

pl.clf()

t3 = time.time()

kernel = KernelSmoothing()

# Prior input PDFs (for comparison)
for i in range(7):
  priorPDF = Prior.getMarginal(i).drawPDF()
  priorPDFdrawable = priorPDF.getDrawable(0)
  posteriordrawable = kernel.build(posterior_sample.getMarginal(i)).drawPDF().getDrawable(0) 
  posteriordrawable.setLegendName('posterior')
  posteriordrawable.setColor('blue')
  priorPDF.add(posteriordrawable)
  repe = "prior_vs_posterior_var_entree_"+str(i+1)+"_sampleSize"+str(sampleSize)+"_n_"+str(n)
  if not(repe in os.listdir('.')) :
    os.mkdir(repe)
  os.chdir(repe)
  filename = "prior_vs_posterior_var_entree_"+str(i+1)+"_sampleSize"+str(sampleSize)+"_n_"+str(n)
  priorPDF.draw(filename)
  os.chdir('../.')

np.savetxt('echantillon_'+str(n),np_posterior_sample)
print 'calcul de la loi a posteriori : ',t2-t1
print 'calcul d un echantillon a posteriori : ',t3-t2

print "LogVraisemblance(Theta0) = ", RWMHsampler.computeLogLikelihood(Theta0)
print "LogVraisemblance(10*Theta0) = ", RWMHsampler.computeLogLikelihood(10*Theta0)

pl.clf()
pl.figure(7)
####
# Impression des donnees
####
data       = open('fluage.data','r')
temps_data = []
eps1_data  = []

liste = []
liste = data.readlines()
data.close()

for j in range(1,len(liste)):
  ligne1 = liste[j].split()
  temps_data.append(float(ligne1[0]))
  eps1_data.append(float(ligne1[1]))
for ind in range(len(temps_data)):
  temps_data[ind] = temps_data[ind]/(3600.*24.*365.)

pl.plot(temps_data, eps1_data,'bo' ,label="Donnees essais exp.")
pl.legend(bbox_to_anchor=(.05, 1), loc=2, borderaxespad=0.)
pl.xlabel("Temps (annees)")
pl.ylabel("Deformation differee")
pl.title("Comparaison actu bayes et essais")

###
# Impression du posterior et de l intervalle de confiance a 95 %
####
# discretisation temporelle au dela des instants pour lesquels on dispose de
# donnees experimentales
nb_disc = 600

execfile('StepB.py')

sizeX = n

SampleMC     = model(posterior_sample)
priorSample  = Prior.getSample(sizeX)
sample_model_prior = model(priorSample)

meanMC       = SampleMC.computeMean()  
covarianceMC = SampleMC.computeCovariance()
meanMCprior       = sample_model_prior.computeMean()  
covarianceMCprior = sample_model_prior.computeCovariance()

# calcul de l'intervalle de confiance a 95% de l'estimateur meanMC
borneinf_meanMC=[]
bornesup_meanMC=[]

alpha = 0.95

lower_bnds=np.array(SampleMC.computeQuantilePerComponent((1-alpha)/2.) )
upper_bnds=np.array(SampleMC.computeQuantilePerComponent((1+alpha)/2.) )
lower_bnds_pr=np.array(sample_model_prior.computeQuantilePerComponent((1-alpha)/2.) )
upper_bnds_pr=np.array(sample_model_prior.computeQuantilePerComponent((1+alpha)/2.) )

for i in range(dim_out):
  borneinf_meanMC.append(meanMC[i]-1.96*np.sqrt(covarianceMC[i, i])/np.sqrt(sizeX))
  bornesup_meanMC.append(meanMC[i]+1.96*np.sqrt(covarianceMC[i, i])/np.sqrt(sizeX))

########
# On ramene en annees
########

TEMPS   = [];

# Instant final 60 ans
T_FIN = 1.892E9;
TEMPS = np.zeros(dim_out)
for i in range(0,len(temps)):
    TEMPS[i] = np.array(temps[i])
T_FIN_data = TEMPS[nb_data-1]
for i in range(1,nb_disc+1):
    TEMPS[nb_data+i-1] = T_FIN_data+i*(T_FIN-T_FIN_data)/nb_disc

for ind in range(len(TEMPS)) :
  TEMPS[ind] = TEMPS[ind]/(3600.*24.*365.)

pl.clf()
pl.figure(100)
pl.xlabel("Temps (annees)")
pl.ylabel("Deformation differee")
pl.title("Comparaison actu bayes et essais")
pl.plot(temps_data, eps1_data,'bo' ,label="Donnees essais exp.")
pl.plot(TEMPS, meanMC,'g', label="Moyenne MC posterior")
pl.plot(TEMPS, meanMCprior,'k', label="Moyenne MC prior")
pl.plot(TEMPS,lower_bnds,'b',label="quantile 2.5 posterior")
pl.plot(TEMPS,upper_bnds,'b',label="quantile 97.5 posterior")
pl.plot(TEMPS,lower_bnds_pr,'r',label="quantile 2.5 prior")
pl.plot(TEMPS,upper_bnds_pr,'r',label="quantile 97.5 prior")
filename = "estimation_cv_MC_taille_"+str(sizeX)
pl.legend(loc='best')
pl.savefig(filename)
pl.xlim(0,3.)
pl.ylim(0,0.001)
filename="estimation_cv_MC_taille_restreint_"+str(sizeX)
pl.savefig(filename)

for ind in range(len(TEMPS)) :
  TEMPS[ind] = TEMPS[ind]*(3600.*24.*365.)

print 'moyenne de l echantillon posteriori = ',posterior_sample.computeMean()
print 'ecart type de l echantillon posteriori = ',posterior_sample.computeStandardDeviationPerComponent()

for i in range(6):
  print 'coefficient de variation variable '+str(i+1)+' : ',posterior_sample.computeStandardDeviationPerComponent()[i]/posterior_sample.computeMean()[i]

python-randomfields: a Python module for identifying non-stationary non-Gaussian random fields

Hi all,

On the occasion of the Material Ageing Institute Scientific Network (MAI-SN) partnership (initiated by EdF, TEPCO and EPRI), Phimeca Engineering, EdF R&D and Institut Navier (ENPC) implemented a prototype Python module for the simulation and the identification of non-stationary non-Gaussian random fields based on the Karhunen-Loève expansion representation. Provided some suitable measurements are made this could help modelling the randomness in the material properties of ageing structures.

It is our pleasure to announce this module is available for using and contributing. In this respect, a dedicated github repos has been created:

https://github.com/dubourg/python-randomfields

Please feel free to fork it or simply clone/download the code.

The main page of the repos displays a README file whose reading is highly recommended. It also features a non-exhaustive todo list. Contributions are welcome!

The module is essentialy based on Numpy and the ever-improving Python bindings to OpenTURNS (Thanks J!). It implements a single object named KarhunenLoeveExpansion which is self-documented using python dosctrings. The object discretizes the random field at instanciation by solving the Fredholm integral equation using a Legendre-Galerkin scheme. Integrals are numerically approximated using Gauss-Legendre quadrature (from OpenTURNS). Other Galerkin schemes could possibly be used (eg. wavelet-based Galerkin schemes) but the Legendre scheme is the only scheme implemented yet. The instanciated object is callable and takes the field index vector (eg space, time, potatoes, ...) and the seed as input. See the references in the README file if this already sounds like gibberish to you!

The repos contains 4 scripts in addition to the module for illustrating how the KLE object can be used for simulation and identification. Regarding identification, once the sample paths have been transformed into a sample of coefficients vector, we are back into basic statistics! Hence, OpenTURNS can be used for identifying the joint distribution of this coefficients vector. Karhunen-Loève theory states they are zero-mean, unit-variance and uncorrelated (by construction) but their joint distribution is generally unknown if the field is not assumed to be Gaussian. Identifying such large vectors is still a technically (if not scientifically) openned question though so...

Enjoy and give us feedback!

Marine Marcilhac & Vincent Dubourg

PS: Thank Marc Berveiller and Géraud Blatman at EdF for the very first feedback which helped reaching this first public version.

New distributed python wrapper

Hello,

a new python wrapper is available on the souchaud branch : svn co https://svn.openturns.org/branches/souchaud (version 2691 is ok)

characteristic of the new wrapper :

  • ease-of-use: use python language and propose a module (coupling_tools) that helps manipulate templates
  • transparent compute distribution on each core of the local machine.
  • transparent compute distribution on each core of several hosts (needs a ssh server on each remote hosts, not yet implemented on windows).
  • transparent compute distribution using a remote cluster (not yet implemented)

An example is available in the branch: wrappers/WrapperTemplates/wrapper_python_distributed.
If you want to distribute computing onto several hosts, the python-paramiko module must be installed on the machine that launch the openturns script.

For further info on the DistributedPythonFunction and CouplingTools interface :

This wrapper should be tested in order to give us enough feedback. It is still under development and some part of its code has to be improved/cleanup...

This wrapper overhead is around 0.05s per point (constant from 1 to 8 cores), so external code that last less than 0.1s will not be speed up as fast as theoricaly. (e.g. 1000 point to compute. each point last 0.1s -> total computing time using one core : 1000*(0.1+0.05) = 150s (would be 100s without overhead)). Tested on i7 2.3Ghz*8 w/oHT + SSD.
The current C wrapper (e.g. in wrappers/WrapperTemplates/wrapper_calling_shell_command) overhead is around 0.01s per point (dependent to the number of core used: 1core->overhead=0.007s, 8cores->overhead=0.02s).

If you've got a huge number of point that last less that 0.01s, the best is to implement _exec_sample function directly (using C wrapper or OpenTURNSPythonFunction). If needed, the new wrapper could in the future, permit to implement _exec_sample in order to reduce the overhead.

If you've got some ideas or remarks, do not hesitate to make a comment, but check first it is not already in the todo file (in wrappers/WrapperTemplates/wrapper_python_distributed/TODO.txt).

Regards.

Mathieu Souchaud

OT function decorator

Hi,

Here is an easier way to pass python functions to openturns

First define once this decorator for python functions (could be proposed in a future version):

def FunctionDecorator(n, p):
  class otfunc(OpenTURNSPythonFunction):
      
    def __init__(self, func):
      OpenTURNSPythonFunction.__init__(self, n, p) 
      self.__class__.__name__ = func.__name__
      self._exec = func 
        
  return otfunc

Then apply it to your function by adding this one line:

@FunctionDecorator(2, 1)
def SIMPLEFUNC(X):
    Y = [ X[0] + X[1] ]    
    return Y

Then use it through OpenTURNS as usual:

myFunc = NumericalMathFunction( SIMPLEFUNC )

This is open for discussion ...

J.

openturns-agrum: a new module for OpenTURNS, that allows to create a distribution from a Bayesian Network using the aGrUM library

Hi, in collaboration with Pierre-Henri WUILLEMIN (LIP6, aGrUM team member) and Anne DUTFOY (EDF R&D, OpenTURNS team member), I have written a new OpenTURNS module that allows to create 1D OpenTURNS distributions as conditional marginal distributions of a Bayesian network built using aGrUM (http://agrum.lip6.fr/doku.php). It is very useful when one has conditional information about an uncertain quantity but not statistical data directly related to this quantity.

This module is compatible with OpenTURNS 0.13.2 and aGrUM revision 382.

Here an example of a python script to use this module:

from openturns import * from pyAgrum import * from otagrum import *

  1. Create an empty BN

myBN = BayesNet()

  1. Create light variable

light = LabelizedVar("Light", "Light", 0) light.addLabel("Dim") light.addLabel("Bright") lightSize = len(light)

  1. Create moisture vriable

moisture = LabelizedVar("Moisture", "Moisture", 0) moisture.addLabel("Dry") moisture.addLabel("Wet") moistureSize = len(moisture)

  1. Create height variable

height = DiscretizedVar("Height", "Height") data = NumericalPoint(range(10, 100, 10)) heightWhenDimAndDry = Uniform(0.0, 40.0) heightWhenDimAndWet = Uniform(10.0, 60.0) heightWhenBrightAndDry = Uniform(0.0, 20.0) heightWhenBrightAndWet = Uniform(40.0, 100.0) data = BayesNetAgrum.AdaptGrid(DistributionCollection(heightWhenDimAndDry, heightWhenDimA...), data) heightSize = data.getSize() - 1 for i in list(data):

height.addTick(i)

  1. Add variables to the net

indexLight = myBN.add(light) indexMoisture = myBN.add(moisture) indexHeight = myBN.add(height)

  1. Create arcs

myBN.insertArc(indexLight, indexMoisture) myBN.insertArc(indexLight, indexHeight) myBN.insertArc(indexMoisture, indexHeight)

  1. Create conditional probability tables

myBN.cpt(indexLight):= 0.5, 0.5 myBN.cpt(indexMoisture){'Light' : 'Dim'} = 0.2, 0.8 myBN.cpt(indexMoisture){'Light' : 'Bright'} = 0.6, 0.4 myBN.cpt(indexHeight){'Light': 'Dim', 'Moisture': 'Dry'} = BayesNetAgrum().Discretize(heightWhenDimAndDry, data) myBN.cpt(indexHeight){'Light': 'Bright', 'Moisture': 'Dr... = BayesNetAgrum().Discretize(heightWhenBrightAndDry, data) myBN.cpt(indexHeight){'Light': 'Dim', 'Moisture': 'Wet'} = BayesNetAgrum().Discretize(heightWhenDimAndWet, data) myBN.cpt(indexHeight){'Light': 'Bright', 'Moisture': 'We... = BayesNetAgrum().Discretize(heightWhenBrightAndWet, data)

  1. Create a BayesNetAgrum object

otbn = BayesNetAgrum(myBN)

  1. Export to BIF file

otbn.exportToBIFFile("test.bif")

  1. Perform inference

for evidenceValue in "Bright", "Dim":

   # First, set evidence
   otbn.setEvidence("Light", evidenceValue)
   # Second, get a marginal distribution of interrest
   heightDistribution = otbn.getMarginal("Height")
   heightDistribution.setName("Height_Light_" + evidenceValue)
   # Use the distribution the way you want
   print "heightDistribution=", heightDistribution
   heightDistribution.drawPDF(-10.0, 110.0, 512).draw("PDF_" + heightDistribution.getName())
   heightDistribution.drawCDF(-10.0, 110.0, 512).draw("CDF_" + heightDistribution.getName())

Building a metamodel with Polynomial Chaos Expansion using a database

Hello all,

Recently, a colleague of mine asked me if it was possible to build a Polynomial Chaos Expansion metamodel in OpenTURNS with only a database of inputs versus outputs. After a little bit of scratching on the top of my head, I reached Regis and after a small chat with him, I obtained the following script. I have not tested it with such decoration (name of the files, etc.) so maybe it will not work straightforwardly but the core of the problem is treated here.

Enjoy!

# -*- coding: iso-8859-15 -*-
from openturns import *
from math import *
 
#This script allows to build the metamodel by Polynomial Chaos Expansion using a database as input. 
#Y = f(X)
#For the moment, it is mandatory to go through an intermediate variable Z
#The hypothesis is that each variable varies in an interval [a,b]
#Z = phi(X), such that each Zj is Uniform(0.,1.). phi is affine
#Y = g(Z)
#We do the PCE on g and finally f = metamodel o phi
 
#The input file in InputFile.txt
 
 
 
#this function takes a input in the database Z and returns the associated output
def norm2(X,Y):
  norm = 0.
  for i in range(Y.getDimension()):
    norm+=(X[i]-Y[i])**2
  return sqrt(norm)
 
 
#this function takes a input in the database Z and returns the associated output
class statModel(OpenTURNSPythonFunction):
    def __init__(self):
        OpenTURNSPythonFunction.__init__(self, globals()["dim"], 1)
    def f(self,X):
      for i in range(globals()["siz"]):
	if norm2(X,globals()["Z"][i])<1.e-6:
	  v=i
	  break
      try:
	Y = globals()["spOutputTR"][v][0]
      except:
	Y = 0.
      return [Y]
 
 
 
#this function is the affine funtion that transforms X into Z
class affineTransform(OpenTURNSPythonFunction):
	def __init__(self):
	  OpenTURNSPythonFunction.__init__(self, globals()["dim"], globals()["dim"])
	  #change input dimension
	def f(self,X):
	  interval = globals()["interv"]
	  Z = []
	  for j in range(len(X)):
	    Z.append(X[j] - interval[j][0])/(interval[j][1]-interval[j][0])
	  return Z
 
 
 
 
# Get training input from InputFile.txt
inVar = NumericalSample.ImportFromCSVFile("InputFile.txt")
inVar.setName("Input Variable")
 
 
#get the dimension and size of the sample
dim = inVar.getDimension()
siz = inVar.getSize()
 
# Define the intervals where the input variables are defined
#list of min values for each variable, to be specified by user
a1 = 0.
a2 = 0.
a3 = 0.
a4 = 0.
Min = [a1,a2,a3,a4]
#list of max values for each variable, to be specified by user
b1 = 1.
b2 = 2.
b3 = 3.
b4 = 4.
Max = [b1,b2,b3,b4]
 
interv = []
for i in range(len(Min)):
  interv.append([Min[i],Max[i]])
 
# Create Z the transformed input vector
Z = NumericalSample(siz,dim)
for i in range(siz):
  for j in range(dim):
    Z[i][j] = (inVar[i][j] - interv[j][0])/(interv[j][1]-interv[j][0])
 
# Get training output from OutputFile.txt
outVar = NumericalSample.ImportFromCSVFile("OutputFile.txt")
outVar.setName("Output Variable")
 
# Create Polynomial Chaos Expansion of outVar = g(Z)
polyColl = PolynomialFamilyCollection(dim)
 
legendreFamily = LegendreFactory()
for i in range(dim):
  polyColl[i] = OrthogonalUniVariatePolynomialFamily(legendreFamily)
 
multivariateBasis = OrthogonalProductPolynomialFactory(polyColl,EnumerateFunction(dim))
maximumConsideredTerms = 500
mostSignificant = 10
significanceFactor = 1.e-6
truncatureBasisStrategy = CleaningStrategy(OrthogonalBasis(multivariateBasis),maximumConsideredTerms, mostSignificant, significanceFactor, True)
evaluationCoeffStrategy = LeastSquaresStrategy(FixedExperiment(Z))
model = NumericalMathFunction(statModel())
polynomialChaosAlgorithm = FunctionalChaosAlgorithm(model, Distribution(dist), AdaptiveStrategy(truncatureBasisStrategy), ProjectionStrategy(evaluationCoeffStrategy))
 
polynomialChaosAlgorithm.run()
polynomialChaosResult = polynomialChaosAlgorithm.getResult()
 
coefficients = polynomialChaosResult.getCoefficients()
metaModel = polynomialChaosResult.getMetaModel()
 
# Get Response Surface : composition between Y = g(Z) and Z = aff(X) => our model is (g o aff)
proj = NumericalMathFunction(affineTransform())
modelfull = NumericalMathFunction(metaModel,proj)
 
# Get estimation input from InputCheck.txt
inpCheck = NumericalSample.ImportFromCSVFile("InputCheck.txt")
inpCheck.setName("Input variable for checking")
 
 
# Get estimation output
outCheck = modelfull(inpCheck)
outCheck.exportToCSVFile("OutputCheck.csv"," ")
 
fOutputCheck = open("OutputCheck.txt", 'w')
 
for i in range(outCheck.getSize()):
    fOutputCheck.write("%f\n" % (outCheck[i])[0])
 
fOutputCheck.close()

ot-mixmod: a new OT-module to create a Mixture Factory composed of Normal Distributions

Hi, I have written a small OpenTURNS module to create a Mixture Factory composed of Normal Distributions from a Numerical Sample. The sources of the module are available in the openturns-module repository. However, you can download here the version 0.1.0.

This module uses MixMod with the default parameters. This module requires version 0.13.2 of OpenTURNS and MixMod executable.

Here an example of a python script to use this module:

from openturns import * from otmixmod import * draw = True dim = 2 size = 2000 coll = DistributionCollection(0) R = CorrelationMatrix(dim) for i in range(dim-1):

R[i,i+1] = 0.1

coll.add(Distribution(Normal(NumericalPoint(dim, -2.0), NumericalPoint(dim, 1.2), R))) for i in range(dim-1):

R[i,i+1] = -0.2

coll.add(Distribution(Normal(NumericalPoint(dim, 2.0), NumericalPoint(dim, 0.8), R))) mean = NumericalPoint(dim, 5.0) mean[0] = -5.0 coll.add(Distribution(Normal(mean, NumericalPoint(dim, 1.4), CorrelationMatrix(dim)))) mixture = Mixture(coll) sample = mixture.getNumericalSample(size) factory = MixtureFactory(3) estimatedDistribution = factory.buildImplementation(sample) print "Estimated distribution=", estimatedDistribution if draw:

g = estimatedDistribution.drawPDF() c = Cloud(sample) c.setColor("red") c.setPointStyle("bullet") ctmp = g.getDrawable(0) g.setDrawable(Drawable(c), 0) g.addDrawable(ctmp) g.draw("testMixMod")

Automatic quantification of a marginal based on a 1D sample

Hello all,

I propose you the following python script. It allows to simply quantify a 1D variable from a sample. First, you create the family of parametric distribution on which you want to test your sample. If the best distribution does not fulfill the quality criteria, you go to a non parametric quantification by Kernel Smoothing. The output of the function is the distribution and the value of the test.

# -*- coding: iso-8859-15 -*-
from openturns import *
 
def quantification(sample1D):
  # Creation of a factory collection
  factoryCollection = FactoryCollection()
  factoryCollection.add(DistributionFactory(UniformFactory()))
  factoryCollection.add(DistributionFactory(NormalFactory()))
  factoryCollection.add(DistributionFactory(BetaFactory()))
  #factoryCollection.add(DistributionFactory(WeibullFactory()))
  #factoryCollection.add(DistributionFactory(LogNormalFactory()))
  #factoryCollection.add(DistributionFactory(BetaFactory()))
  #factoryCollection.add(DistributionFactory(GammaFactory()))
  #factoryCollection.add(DistributionFactory(ExponentialFactory()))
  #factoryCollection.add(DistributionFactory(GumbelFactory()))
  #factoryCollection.add(DistributionFactory(WeibullFactory()))
  #factoryCollection.add(DistributionFactory(TruncatedNormalFactory()))
  #factoryCollection.add(DistributionFactory(StudentFactory()))
 
  try:
    BestDistributionMarg = Distribution(FittingTest().BestModelKolmogorov(sample1D, factoryCollection))
    print BestDistributionMarg
  except:
    print "impossible to do the parametric quantification"
## Study of the result by comparing the empirical law of the sample and the distribution obtained by parametric
## if the test is ok, we keep the parametric law, if not, go to non parametric quantification
  try:
    TestMarg = FittingTest_Kolmogorov(SampleInputMarg,BestDistributionMarg).getBinaryQualityMeasure()
    print "Parametric quantification"
  except:
    TestMarg = False
## test not ok
  if TestMarg == False :
    kernel=KernelSmoothing()
    BestDistributionMarg = Distribution(kernel.buildImplementation(sample1D))
    print "Quantification by kernel smoothing"
  ## optional visual tests
  QQPlot = VisualTest().DrawQQplot(sample1D,BestDistributionMarg,1000)
  QQPlot.draw("QQPlotMargin", 640, 480)
  QQPlot.getBitmap()
 
  return BestDistributionMarg, TestMarg

Convergence des moments statistiques de la fonction d'Ishigami en fonction de la taille du plan d'expériences avec le chaos polynomial

Testé avec la version 0.12.3 d'OpenTURNS

# Chargement des librairies
from openturns import *
from math import *
from openturns_viewer import ViewImage
 
# Initialisation de la graîne pour les simulations de Monte-Carlo
RandomGenerator().SetSeed(0)
 
# Définition de la fonction factorielle
def factorielle(x):
	if x==0:
		return 1
	else:
		return x*factorielle(x-1)
 
# Définition des constantes intervenant dans le fonction d'Ishigami
A = 7.0
B = 0.1 
 
#Modele physique : fonction d'Ishigami
class myfunction(OpenTURNSPythonFunction):
	def __init__(self):
		OpenTURNSPythonFunction.__init__(self, 3, 1)
	def f(self, X):
		H = sin(X[0]) + A*(sin(X[1])**2) + B*(X[2]**4)*sin(X[0])
		Y=[H]
		return Y
 
function=myfunction()
g = NumericalMathFunction(function)
 
# Nombre de variables aléatoires d'entrées
dim = 3
 
#Modele probababiliste
mean=NumericalPoint(3)
mean[0]=0.0
mean[1]=0.0
mean[2]=0.0
 
L1=Uniform(-pi,pi)
L2=Uniform(-pi,pi)
L3=Uniform(-pi,pi)
 
Collection=DistributionCollection(dim)
Collection[0]=Distribution(L1)
Collection[1]=Distribution(L2)
Collection[2]=Distribution(L3)
 
Copule=IndependentCopula(dim)
Modelproba=ComposedDistribution(Collection,Copula(Copule))
 
# Definition des polynômes unidimensionels du chaos polynomial
legendre = OrthogonalUniVariatePolynomialFamily(LegendreFactory())
polyColl = PolynomialFamilyCollection(dim)
polyColl[0]=legendre
polyColl[1]=legendre
polyColl[2]=legendre
 
# Affichage d'un polynôme de la base
enumFunc = EnumerateFunction(dim)
MultiIndice_1 = enumFunc(0)
print "Polynome 1 de la base : ", MultiIndice_1
 
productPoly = OrthogonalProductPolynomialFactory(polyColl,enumFunc)
basis = OrthogonalBasis(productPoly)
 
mesure = basis.getMeasure()
################################################################	
# Calcul du nombre de termes dans le dvpt sur CP
# On fixe le degré du chaos polynomial
p=5
P=factorielle(dim+p)/(factorielle(dim)*factorielle(p))
print "Taille du chaos polynomial pour p=3 : ", P
 
 
################################################################	
nb_calculs=range(0,3)
 
moyenne=[0]
variance=[0]
taille_plan=[0]
taille_base_finale=[0]
for i in nb_calculs:
	sampleSize = P+10*i
	taille_plan.append(sampleSize)
	print "Taille du plan d experiences : ", sampleSize
	regressionStrategy = RegressionStrategy(MonteCarloExperiment(sampleSize))
	#regressionStrategy = RegressionStrategy(LHSExperiment(sampleSize))		
 
	#Fixed Strategy
	basisSize = P
	#print "Taille de la base", basisSize
	fixedStrategy= FixedStrategy(basis,basisSize)
	#polynomialChaosAlgorithm = FunctionalChaosAlgorithm(g,Distribution(Modelproba),AdaptiveStrategy(fixedStrategy),ProjectionStrategy(regressionStrategy))
 
	#SequentialStrategy:	
	sequentialStrategy=SequentialStrategy(basis,basisSize)	
	#polynomialChaosAlgorithm = FunctionalChaosAlgorithm(g,Distribution(Modelproba),AdaptiveStrategy(sequentialStrategy),ProjectionStrategy(regressionStrategy))
 
	#CleaningStrategy:		
 
	maxIndex = 3*P #Numero du dernier polynome Psi que l on peut utiliser
	basisSize_cleaning = P #ici le basisSise_cleaning est la taille max de la base toleree
	significanceFactor = 1.0e-3	
	cleaningStrategy = CleaningStrategy(basis,maxIndex,basisSize_cleaning , significanceFactor,True)	
	polynomialChaosAlgorithm = FunctionalChaosAlgorithm(g,Distribution(Modelproba),AdaptiveStrategy(cleaningStrategy),ProjectionStrategy(regressionStrategy))
 
 
	polynomialChaosAlgorithm.run()
 
	#result	
	polynomialChaosResult = polynomialChaosAlgorithm.getResult()	
 
	#Get the polynomial chaos coefficients	
	coefficients = polynomialChaosResult.getCoefficients()	
 
	#Get the metamodel as a NumericalMathFunction	
	metaModel = polynomialChaosResult.getMetaModel()	
 
	#Get the basis		
	basisCollection = polynomialChaosResult.getBasis()	
	print "Taille de la base finale : ", polynomialChaosResult.getBasis().getSize()
 
	taille_base_finale.append(polynomialChaosResult.getBasis().getSize())
	#Get the distribution of variables U	
	mesure = polynomialChaosResult.getMeasure()	
 
	#Get the composedmodel which is the model of the reduced variables U	
	composedModel = polynomialChaosResult.getComposedModel	
 
 
	# Calcul des moments a partir des coefficients
	print "Moyenne a partir des coefficients : " , polynomialChaosResult.getCoefficients()[0]
	print "Erreur sur la moyenne : " , (polynomialChaosResult.getCoefficients()[0]-3.5)/3.5
	moyenne.append(polynomialChaosResult.getCoefficients()[0])
	# Calcul de la variance a partir des coefficients
	variance_coeffs=polynomialChaosResult.getCoefficients().norm2()-	polynomialChaosResult.getCoefficients()[0]**2
	print  "variance a partir des coefficients : " , variance_coeffs
	VarianceTheorique = (A**2)/8.0 + (B)*(pi**4)/5.0 + (B**2)*(pi**8)/18.0 +1.0/2.0
	print  "Erreur sur la variance : " , (variance_coeffs-VarianceTheorique)/VarianceTheorique,"\n"
	#print  "ecart-type a partir des coefficients : " , sqrt(variance_coeffs)
	variance.append(variance_coeffs)
 
fic_cv=open('cv_Ishigami_cleaning_5','w')
for i in nb_calculs:
	fic_cv.write(str(taille_plan[i+1])+' '+str(taille_base_finale[i+1])+' '+str(moyenne[i+1])+' '+str(variance[i+1])+'\n')
fic_cv.close()
 
 
print moyenne
print variance
# Variance theorique
VarianceTheorique = (A**2)/8.0 + (B)*(pi**4)/5.0 + (B**2)*(pi**8)/18.0 +1.0/2.0
print "variance theorique : ", VarianceTheorique
 
fic_coeff=open('coefficients_Ishigami_cleaning_5','w')
nb_coeffs =  polynomialChaosResult.getCoefficients().getDimension()
affichage_coefficients=range(0,nb_coeffs)
for i in affichage_coefficients:
	#print enumFunc(i), polynomialChaosResult.getCoefficients()[i]
	fic_coeff.write(str(enumFunc(i))+' '+str(polynomialChaosResult.getCoefficients()[i])+'\n')
fic_coeff.close()

Trapezoidal distribution using truncature and mixture

# this is a script to create a trapezoid like distribution
# ========================================================
# version OT 0.12.3


from openturns import *
from openturns_viewer import *
from math import *


# The four next lines are the only one the user has to fill in. (change the values)
# warning : the values must respect the following ordre : a<b<c<d

a=1.0 # lower bound
b=2.0 # lower bound of the uniform distribution 
c=5.0 # upper bound of the uniform distribution 
d=7.0 # upper bound

######################################################
####      Trapezoid shape distribution       #########
######################################################

#Creation of the distributions:
triang=Triangular(a,(a+d)/2,d) # regular triangular distribution
myTruncatedTriang1 = TruncatedDistribution(Distribution(triang),b,TruncatedDistribution.UPPER) # truncated dist. on the right
myTruncatedTriang2=TruncatedDistribution(Distribution(triang),c,TruncatedDistribution.LOWER) # truncated dist. on the left
unif=Uniform(b,c) # uniform distribution

aCollection=DistributionCollection(3)  # mixture of these distribution : see UseCasesGuide manual from OT documentation

aCollection[0]=Distribution(myTruncatedTriang1)
aCollection[1]=Distribution(unif)
aCollection[2]=Distribution(myTruncatedTriang2)


aCollection[0].setWeight((b-a)/2) # weight of each distribution in order to have a continuous final distribution
aCollection[1].setWeight(c-b)
aCollection[2].setWeight((d-c)/2)

myTrapezoid=Mixture(aCollection) #final distribution

OneHundredrealisations=myTrapezoid.getNumericalSample(1000000) # graph of the obtained distribution
sampleHistOpt=VisualTest.DrawHistogram(OneHundredrealisations)
sampleHistOpt.draw("sampleHistOpt")
Show(sampleHistOpt)
ViewImage(sampleHistOpt.getBitmap())

First script

you can test Open TURNS with this simple script :

from openturns import *
 
dist = Normal( 0, 1 )
sample = dist.getNumericalSample(10)
print sample

Now you have printed a sample !

Enjoy.