Transformation Script Cont.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
def predict(zbigarray):
humidity = zbigarray[:]["humidity_mean"]
pressure = zbigarray[:]["pressure_mean"]
humidity = humidity[humidity !=0 ]
pressure = pressure[pressure !=0 ]
humid_train, humid_test, pressure_train, pressure_test = train_test_split(humidity.reshape(-1,1), pressure.reshape(-1,1), test_size=0.2, random_state=0)
regressor = LinearRegression()
regressor.fit(humid_train, pressure_train) #training the algorithm
pressure_pred = regressor.predict( zbigarray[:]["humidity_mean"].reshape(-1,1))
df = pd.DataFrame({'Date': zbigarray[:]["date"],'Actual':zbigarray[:]["pressure_mean"], 'Predicted': pressure_pred.flatten()})
return df
out_data_array = out_array["Data Array"]
in_data_array = in_array["Data Array"]
in_zbigarray = in_data_array.getArray()
if in_zbigarray is None:
return
if in_zbigarray.shape[0] == 0:
return
df = predict(in_zbigarray)
ndarray = df.to_records(convert_datetime64=False)
dtype = [('Date', '<M8[ns]'),('Actual', '<f8'), ('Predicted', '<f8')]
zbigarray = out_data_array.getArray()
if zbigarray is None:
zbigarray = out_data_array.initArray(shape=(0,), dtype=dtype)
zbigarray.append(ndarray)
else:
start_index = len(zbigarray)
array_len = len(ndarray)
if start_index < array_len:
zbigarray.append(ndarray[start_index:])
This script takes as an input data from resampled Data Array, splits data into training and tests set.
Then it learns on training set and predicts air pressure based on humidity.
In output array it stored the true and predicted values.
Don't forget to save the changes after editing the script.
Visualisation Script
%% md
# Prediction
<div class="prediction_plot_div" id="prediction_plot_div"/>
%% fetch
js: jio.js
js: ndarray_bundle.js
js: wendelin.js
js: https://cdn.plot.ly/plotly-latest.min.js
%% js
hateoas_url = "https://" + window.location.host + "/erp5/web_site_module/default_wendelin_front/hateoas/";
jio = jIO.createJIO({
type: "erp5",
url: hateoas_url,
default_view_reference: "view"
});
gadget = {
getSetting: function(property) {
return new RSVP.Queue()
.push(function () {
if (property == "hateoas_url") {
return hateoas_url;
}
return;
});
},
jio_getAttachment: function(id, url, parameter_dict) {
return jio.getAttachment(id, url, parameter_dict);
},
jio_get: function(id) {return jio.get(id);}
}
var prediction_label_list = ["Date", "Actual", "Predicted"];
var prediction_graph = document.getElementById('prediction_plot_div');
plot_prediction();
function plot_prediction() {
return getPredictionData()
.push(function (data) {
console.log("data")
console.log(data)
var layout = {barmode: 'stack','title' :'Predict Presure Based on Humidity'};
Plotly.plot(prediction_graph,data, layout);
});
}
function getPredictionData(start_date, stop_date){
function unpack(rows, key) {
return rows.map(function(row) { return row[key]; });
}
array_id = "data_array_module/90";
prediction_graph_data=[];
var start_index = 0;
var stop_index = undefined;
return jio.allDocs({
query: 'portal_type:"Data Analysis Line" AND ' +
'title: "Predicted Data" AND ' +
'resource_reference:"environment-predicted-array" AND ' +
'simulation_state:"started"'
})
.push(function (result) {
var data_analysis_line_id = result.data.rows[0].id;
return jio.allDocs({
query: 'portal_type:"Data Array" AND ' +
'aggregate_related_relative_url:"' + data_analysis_line_id +'"'
});
})
.push(function (result) {
array_id = result.data.rows[0].id;
return wendelin.getArrayRawSlice(gadget, array_id, 0, 1);
})
.push(function (result) {
array_start_date = wendelin.convertFirstColToDate([[result.data[0]]])[0][0];
if (start_index === undefined) {
start_index = Math.max(0, Math.ceil((start_date - array_start_date) / (frequency*1000))),
stop_index = Math.ceil((stop_date - array_start_date) / (frequency*1000));
}
return wendelin.getArrayRawSlice(gadget, array_id, start_index, stop_index);
})
.push(function(result) {
for (i = 0; i < prediction_label_list.length; i += 1) {
prediction_graph_data = prediction_graph_data.concat(nj.unpack(result.pick( null, prediction_label_list[i])));
}
return prediction_graph_data
})
.push(function(result){
var filtered_graph_data = [];
for (var i=0; i<result.length; i++) {
if (result[i][0] != 0) {
filtered_graph_data.push(result[i]);
}
}
return wendelin.convertFirstColToDate(filtered_graph_data)
})
.push(function (graph_data) {
var actual_data = {
type: "scattergl",
mode: "markers",
name: 'Real',
x: unpack(graph_data, prediction_label_list.indexOf("Date")),
y: unpack(graph_data, prediction_label_list.indexOf("Actual")),
line: {color: '#17BECF'}
};
var predicted_data = {
type: "scattergl",
mode: "markers",
name: 'Predicted',
x: unpack(graph_data, prediction_label_list.indexOf("Date")),
y: unpack(graph_data, prediction_label_list.indexOf("Predicted")),
line: {color: '#7F7F7F'}
};
var data = [actual_data,predicted_data];
return data
});
}
Copy/paste the code to your notebook and save the changes.
The script will find automatically the Data Array with prediction results based on the Data Product and plot the data using plotly library.