# Building a metamodel with Polynomial Chaos Expansion using a database

Hello all,

Recently, a colleague of mine asked me if it was possible to build a Polynomial Chaos Expansion metamodel in OpenTURNS with only a database of inputs versus outputs. After a little bit of scratching on the top of my head, I reached Regis and after a small chat with him, I obtained the following script. I have not tested it with such decoration (name of the files, etc.) so maybe it will not work straightforwardly but the core of the problem is treated here.

Enjoy!

```# -*- coding: iso-8859-15 -*-
from openturns import *
from math import *

#This script allows to build the metamodel by Polynomial Chaos Expansion using a database as input.
#Y = f(X)
#For the moment, it is mandatory to go through an intermediate variable Z
#The hypothesis is that each variable varies in an interval [a,b]
#Z = phi(X), such that each Zj is Uniform(0.,1.). phi is affine
#Y = g(Z)
#We do the PCE on g and finally f = metamodel o phi

#The input file in InputFile.txt

#this function takes a input in the database Z and returns the associated output
def norm2(X,Y):
norm = 0.
for i in range(Y.getDimension()):
norm+=(X[i]-Y[i])**2
return sqrt(norm)

#this function takes a input in the database Z and returns the associated output
class statModel(OpenTURNSPythonFunction):
def __init__(self):
OpenTURNSPythonFunction.__init__(self, globals()["dim"], 1)
def f(self,X):
for i in range(globals()["siz"]):
if norm2(X,globals()["Z"][i])<1.e-6:
v=i
break
try:
Y = globals()["spOutputTR"][v]
except:
Y = 0.
return [Y]

#this function is the affine funtion that transforms X into Z
class affineTransform(OpenTURNSPythonFunction):
def __init__(self):
OpenTURNSPythonFunction.__init__(self, globals()["dim"], globals()["dim"])
#change input dimension
def f(self,X):
interval = globals()["interv"]
Z = []
for j in range(len(X)):
Z.append(X[j] - interval[j])/(interval[j]-interval[j])
return Z

# Get training input from InputFile.txt
inVar = NumericalSample.ImportFromCSVFile("InputFile.txt")
inVar.setName("Input Variable")

#get the dimension and size of the sample
dim = inVar.getDimension()
siz = inVar.getSize()

# Define the intervals where the input variables are defined
#list of min values for each variable, to be specified by user
a1 = 0.
a2 = 0.
a3 = 0.
a4 = 0.
Min = [a1,a2,a3,a4]
#list of max values for each variable, to be specified by user
b1 = 1.
b2 = 2.
b3 = 3.
b4 = 4.
Max = [b1,b2,b3,b4]

interv = []
for i in range(len(Min)):
interv.append([Min[i],Max[i]])

# Create Z the transformed input vector
Z = NumericalSample(siz,dim)
for i in range(siz):
for j in range(dim):
Z[i][j] = (inVar[i][j] - interv[j])/(interv[j]-interv[j])

# Get training output from OutputFile.txt
outVar = NumericalSample.ImportFromCSVFile("OutputFile.txt")
outVar.setName("Output Variable")

# Create Polynomial Chaos Expansion of outVar = g(Z)
polyColl = PolynomialFamilyCollection(dim)

legendreFamily = LegendreFactory()
for i in range(dim):
polyColl[i] = OrthogonalUniVariatePolynomialFamily(legendreFamily)

multivariateBasis = OrthogonalProductPolynomialFactory(polyColl,EnumerateFunction(dim))
maximumConsideredTerms = 500
mostSignificant = 10
significanceFactor = 1.e-6
truncatureBasisStrategy = CleaningStrategy(OrthogonalBasis(multivariateBasis),maximumConsideredTerms, mostSignificant, significanceFactor, True)
evaluationCoeffStrategy = LeastSquaresStrategy(FixedExperiment(Z))
model = NumericalMathFunction(statModel())
polynomialChaosAlgorithm = FunctionalChaosAlgorithm(model, Distribution(dist), AdaptiveStrategy(truncatureBasisStrategy), ProjectionStrategy(evaluationCoeffStrategy))

polynomialChaosAlgorithm.run()
polynomialChaosResult = polynomialChaosAlgorithm.getResult()

coefficients = polynomialChaosResult.getCoefficients()
metaModel = polynomialChaosResult.getMetaModel()

# Get Response Surface : composition between Y = g(Z) and Z = aff(X) => our model is (g o aff)
proj = NumericalMathFunction(affineTransform())
modelfull = NumericalMathFunction(metaModel,proj)

# Get estimation input from InputCheck.txt
inpCheck = NumericalSample.ImportFromCSVFile("InputCheck.txt")
inpCheck.setName("Input variable for checking")

# Get estimation output
outCheck = modelfull(inpCheck)
outCheck.exportToCSVFile("OutputCheck.csv"," ")

fOutputCheck = open("OutputCheck.txt", 'w')

for i in range(outCheck.getSize()):
fOutputCheck.write("%f\n" % (outCheck[i]))

fOutputCheck.close()
```