/*
* Copyright (C) 2012 by Richard Brooks
* The file is part of the exstreamspeed library
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
/*
* Example program to explore the basics of the core database API.
* The program does the following:
* 1) Define and populate a sales database that records unit and dollar
* sales by date, product and customer. Maintains aggregate unit sales by date.
* 2) Query database to compute the top 5 most profitable products
* for sales against a specific customer and range of dates.
* 3) Query database to compute total unit sales by date for a range of dates
*
* genschema() explores how to define a database definition. Uses es_mbd and
* es_mbc interfaces.
*
* genproduct() explores how to update a database root node using a string key field
* using the es_mb, es_mbb, es_mbi and es_sd interfaces:
*
* gensales() explores how to update a database node hierarchy. Shows how to
* construct new child nodes and how to update a database field incrementally
* using binding by-reference.
* Uses the es_mb, es_mbb, es_mbi, es_mbn and es_date interfaces.
*
* queryprofit() explores how to run complex queries against a database using
* a database aggregator, membership filter and sorted iterator.
* Uses es_mbm, es_mbd, es_mb, es_mba, es_mbb, es_mbs and es_sd interfaces.
*
* queryunitsales() explores how to run simpler queries against a database using
* a filtered iterator. Uses es_mb, es_mbm, es_mbb, es_mbi and es_date interfaces.
*/
/* constants driving size of database construction */
#define NUMPRODUCT 1000
#define NUMCUSTOMER 250
#define NUMDATES 600
#define NUMSALESPERDATE 10000
/* buffer sizes */
#define SDCHUNKSIZE 4096
#define EBUFSIZE 1024
#define QPLANSIZE 4096
void check( int ret )
{
/* simplistic error checking - abort on error */
if ( ret != ES_ERROR_NONE && ret != ES_ERROR_FAILTOFIND ) {
printf( "error=%d\n", ret );
abort();
}
}
struct es_mbd *genschema()
{
/* construct new empty schema */
struct es_mbd *def = es_mbd_new();
/* construct root class Product( ProductId, ProductCost ) that maintains
product production cost by product-id */
struct es_mbc *c_product;
check( es_mbc_new( def, "Product", 1000, ES_INDEX_NONE, NULL, &c_product ) );
check( es_mbc_addfld( c_product, "ProductId", ES_FLDTYPE_I16 ) );
check( es_mbc_addfld( c_product, "ProductCost", ES_FLDTYPE_F64 ) );
check( es_mbc_addfldidx( c_product, "ProductId" ) );
/* construct root class DateSales( DateId, TotalUnitSales ) that maintains
aggregate unit sales sales by date-id */
struct es_mbc *c_datesales;
check( es_mbc_new( def, "DateSales", 200, ES_INDEX_NONE, NULL, &c_datesales ) );
check( es_mbc_addfld( c_datesales, "DateId", ES_FLDTYPE_I32 ) );
check( es_mbc_addfld( c_datesales, "TotalUnitSales", ES_FLDTYPE_I32 ) );
check( es_mbc_addfldidx( c_datesales, "DateId" ) );
/* construct child class Sales(ProductId,CustomerId,UnitSales,DollarSales) that
maintains the break-down of unit and dollar sales by product and customer
corresponding to a particular date */
struct es_mbc *c_sales;
check( es_mbc_new( def, "Sales", 2000, ES_INDEX_SHARED, c_datesales, &c_sales ) );
check( es_mbc_addfld( c_sales, "ProductId", ES_FLDTYPE_I16 ) );
check( es_mbc_addfld( c_sales, "CustomerId", ES_FLDTYPE_I16 ) );
check( es_mbc_addfld( c_sales, "UnitSales", ES_FLDTYPE_I32 ) );
check( es_mbc_addfld( c_sales, "DollarSales", ES_FLDTYPE_F64 ) );
check( es_mbc_addfldidx( c_sales, "ProductId" ) );
check( es_mbc_addfldidx( c_sales, "CustomerId" ) );
return def;
}
void genproduct( struct es_mb *db, struct es_sd *s_prod )
{
/*
* Populate Product root node and string dictionary
*/
/* retrieve Product class deinition from database */
struct es_mbc *c_product;
check( es_mb_getclass( db, "Product", &c_product ) );
/* retrieve Product root node from database */
struct es_mbn *n_product;
check( es_mb_getnode( db, c_product, &n_product ) );
/* construct field-binding for Product class definition */
struct es_mbb *b_product;
check( es_mbb_new( c_product, &b_product ) );
/* construct iterator from Product field-binding */
struct es_mbi *i_product;
check( es_mbi_new( b_product, &i_product ) );
/* add a binding for ProductId */
uint16_t f_productid;
check( es_mbb_i16_v( b_product, "ProductId", &f_productid ) );
/* add a binding for ProductCost */
double f_productcost;
check( es_mbb_f64_v( b_product, "ProductCost", &f_productcost ) );
/* generate NUMPRODUCT products */
char prodname[256];
unsigned i;
for( i=0; i != NUMPRODUCT; ++i ) {
/* generate product name */
sprintf( prodname, "Product_%03d", i );
/* add to string dictionary - ProductId is assigned the string-id */
unsigned idx;
es_sd_add( s_prod, &idx, prodname );
f_productid = idx;
/* derive production cost */
f_productcost = (double)(f_productid % 100);
/* update Product node with ProductId and ProductCost */
check( es_mbi_findupdate( i_product, n_product ) );
}
/* clean-up */
es_mbb_delete( b_product );
es_mbi_delete( i_product );
}
void gensales( struct es_mb *db )
{
/*
* Populate DateSales and Sales
*/
printf( "Generating %d dates x %d sales => %d random-access updates\n\n",
NUMDATES, NUMSALESPERDATE, NUMDATES*(1+NUMSALESPERDATE) );
/* retrieve DateSales and Sales class definitions */
struct es_mbc *c_datesales, *c_sales;
check( es_mb_getclass( db, "DateSales", &c_datesales ) );
check( es_mb_getclass( db, "Sales", &c_sales ) );
/* retrieve DateSales root node from database */
struct es_mbn *n_datesales;
check( es_mb_getnode( db, c_datesales, &n_datesales ) );
/* construct field-bindings from DateSales and Sales class definition */
struct es_mbb *b_datesales, *b_sales;
check( es_mbb_new( c_datesales, &b_datesales ) );
check( es_mbb_new( c_sales, &b_sales ) );
/* construct iterators from DateSales and Sales field-bindings */
struct es_mbi *i_datesales, *i_sales;
check( es_mbi_new( b_datesales, &i_datesales ) );
check( es_mbi_new( b_sales, &i_sales ) );
/* add field-bindings for DateId */
uint32_t f_dateid;
check( es_mbb_i32_v( b_datesales, "DateId", &f_dateid ) );
/* add binding by-reference for TotalUnitSales as we're going to update this
derived aggregate field incrementally */
uint32_t *f_totalunitsales;
check( es_mbb_i32_r( b_datesales, "TotalUnitSales", &f_totalunitsales ) );
/* add field-binding by-reference for Sales child node */
struct es_mbn **f_sales;
check( es_mbb_mbn_r( b_datesales, "Sales", &f_sales ) );
/* add field bindings for ProductId, CustomerId, UnitSales and DollarSales */
uint16_t f_productid, f_customerid;
uint32_t f_unitsales;
double f_dollarsales;
check( es_mbb_i16_v( b_sales, "ProductId", &f_productid ) );
check( es_mbb_i16_v( b_sales, "CustomerId", &f_customerid ) );
check( es_mbb_i32_v( b_sales, "UnitSales", &f_unitsales ) );
check( es_mbb_f64_v( b_sales, "DollarSales", &f_dollarsales ) );
/* loop over NUMDATES calendar days starting from 4/1/2007 */
f_dateid = es_date_getid_i( 2007, 4, 14 );
unsigned i;
for( i=0; i != NUMDATES; ++i, ++f_dateid ) {
/* update row in DateSales */
check( es_mbi_findupdate( i_datesales, n_datesales ) );
/* construct Sales child node if missing */
if ( ! *f_sales ) {
check( es_mbn_new( db, c_sales, f_sales ) );
}
/* each date has the same set of NUMSALESPERDATE combinations of ProductId
and CustomerId, but different UnitSales and DollarSales */
unsigned i;
uint32_t unitsale = f_dateid%77;
f_productid = 17;
f_customerid = 43;
for( i=0; i!= NUMSALESPERDATE; ++i ) {
/* construct ProductId, CustomerId, UnitSales, DollarSales */
if ( ++f_productid == NUMPRODUCT ) {
f_productid = 1;
}
if ( ++f_customerid == NUMCUSTOMER ) {
f_customerid = 1;
}
f_unitsales = unitsale+i%49;
f_dollarsales = f_unitsales*(13.0+i%17);
/* update aggregate unit sales for this DateId */
*f_totalunitsales += f_unitsales;
/* update this Sales child */
check( es_mbi_findupdate( i_sales, *f_sales ) );
}
}
/* clean-up */
es_mbb_delete( b_datesales );
es_mbi_delete( i_datesales );
es_mbb_delete( b_sales );
es_mbi_delete( i_sales );
}
void queryprofit( struct es_mb *db, struct es_sd *s_product,
int lowdateid, int highdateid, int customerid )
{
/*
* Define and execute a database aggregator to run query
*/
printf( "Compute top 5 most profitable ProductIds for CustomerId=%d from %d to %d\n\n",
customerid, es_date_getcomp( lowdateid ), es_date_getcomp( highdateid ) );
/* construct membership filter for DateId */
struct es_mbm *m_dateid;
check( es_mbm_new( ES_MEMBERSHIP_IN, &m_dateid ) );
check( es_mbm_addrange( m_dateid, lowdateid, highdateid ) );
/* construct membership filter for CustomerId */
struct es_mbm *m_customerid;
check( es_mbm_new( ES_MEMBERSHIP_IN, &m_customerid ) );
check( es_mbm_add( m_customerid, customerid ) );
/* define result database definition and class to store query results */
struct es_mbd *rdef = es_mbd_new();
struct es_mbc *c_res;
check( es_mbc_new( rdef, "Result", 200, ES_INDEX_PRIVATE, NULL, &c_res ) );
/* construct query aggregator against input database definition */
struct es_mba *agg;
char ebuf[EBUFSIZE], queryplan[QPLANSIZE];
struct es_mbd *def = es_mb_getdefinition( db );
check( es_mba_new( def, &agg, ebuf, EBUFSIZE ) );
/* add ProductId as key of query result */
check( es_mba_addkey( agg, "ProductId" ) );
/* add membership filters for DateId and CustomerId to aggregator */
check( es_mba_addmembership( agg, "DateId", m_dateid ) );
check( es_mba_addmembership( agg, "CustomerId", m_customerid ) );
/* define derived value Profit */
check( es_mba_addformula( agg, "Profit", "DollarSales - UnitSales * ProductCost",
ES_FLDTYPE_F64, ebuf, EBUFSIZE ) );
/* add Profit as an aggregate result field */
check( es_mba_addresult( agg, "Profit", ES_AGGREGATE_SUM ) );
/* prepare and display the query plan for the aggregator */
check( es_mba_prepare( agg, rdef, "Result", queryplan, QPLANSIZE ) );
printf( "Aggregator Query Plan:\n%s\n", queryplan );
/* construct an empty result database with which to fill query results */
struct es_mb *rdb;
check( es_mb_new( rdef, &rdb ) );
/* execute query against input and result databases */
check( es_mba_exec( agg, db, rdb ) );
/*
* print top 5 most profitable products
*/
/* construct binding for ProductId and Profit fields in Result class */
struct es_mbb *b_res;
struct es_mbn *n_res;
uint16_t f_productid;
double f_profit;
check( es_mb_getnode( rdb, c_res, &n_res ) );
check( es_mbb_new( c_res, &b_res ) );
check( es_mbb_i16_v( b_res, "ProductId", &f_productid ) );
check( es_mbb_f64_v( b_res, "Profit", &f_profit ) );
/* construct sorted iterator. Use ES_ORDER_BOTTOM since we only want the
first 5 entries and don't want to sort the entire node */
struct es_mbs *s_res;
check( es_mbs_new( b_res, ES_ORDER_BOTTOM, &s_res ) );
check( es_mbs_add( s_res, "Profit" ) );
/* extract first five entries */
printf("Rank,Product,Profit\n");
int i,r = es_mbs_beginfetch( s_res, n_res );
for( i=0; i!=5 && r == ES_ERROR_NONE; ++i ) {
/* get name of product from string dictionary by id */
const char *prodname = es_sd_getstr( s_product, f_productid );
printf( "%d,%s,%f\n", i+1,prodname, f_profit );
r = es_mbs_incfetch( s_res );
}
printf( "\n" );
/* clean-up aggregator and query results */
es_mbb_delete( b_res );
es_mbs_delete( s_res );
es_mbm_delete( m_dateid );
es_mbm_delete( m_customerid );
es_mb_delete( rdb );
es_mbd_delete( rdef );
es_mba_delete( agg );
}
void queryprofitq( struct es_mb *db, struct es_sd *s_product,
int lowdateid, int highdateid, int customerid )
{
/*
* Define and execute a database query expression
*/
printf( "Compute top 5 most profitable ProductIds for CustomerId=%d from %d to %d\n\n",
customerid, es_date_getcomp( lowdateid ), es_date_getcomp( highdateid ) );
/* construct query aggregator against input database definition */
struct es_mbq *qry;
check( es_mbq_new( &qry ) );
/* define membership query string for date filter */
check( es_mbq_add( qry, "mbm( id=DateF memtype=in " ) );
check( es_mbq_add( qry, "addrange( low=" ));
check( es_mbq_addint( qry, lowdateid ) );
check( es_mbq_add( qry, " high=" ) );
check( es_mbq_addint( qry, highdateid ) );
check( es_mbq_add( qry, "))\n" ) );
/* define membership query string for customer filter */
check( es_mbq_add( qry, "mbm( id=CustomerF memtype=in " ) );
check( es_mbq_add( qry, "add( value=" ));
check( es_mbq_addint( qry, customerid ) );
check( es_mbq_add( qry, "))\n" ) );
/* define result mbc */
check( es_mbq_add( qry, "mbc( id=Result batch=200 index=private)\n" ) );
/* define aggregation query */
check( es_mbq_add( qry, "mba( refmbc=Result " ) );
check( es_mbq_add( qry, "addmembership( field=DateId refmbm=DateF ) " ) );
check( es_mbq_add( qry, "addmembership( field=CustomerId refmbm=CustomerF ) " ) );
check( es_mbq_add( qry, "addformula( field=Profit type=f64 " ) );
check( es_mbq_add( qry, " def=\"DollarSales - UnitSales * ProductCost\") " ) );
check( es_mbq_add( qry, "addkey( field=ProductId ) " ) );
check( es_mbq_add( qry, "addresult( field=Profit aggtype=sum ))" ) );
/* prepare and print query plan */
char ebuf[EBUFSIZE], queryplan[QPLANSIZE];
struct es_mbd *def = es_mb_getdefinition( db );
int pres = es_mbq_prepare( qry, def, NULL, queryplan, QPLANSIZE, ebuf, EBUFSIZE );
if ( pres != ES_ERROR_NONE ) {
printf( "error preparing query code=%d, errmsg=%s\n", pres, ebuf );
abort();
}
printf( "Aggregator Query Plan Derived From Query Language Expression:\n%s\n",
queryplan );
/* execute query */
struct es_mb *rdb;
check( es_mbq_exec( qry, db, &rdb ) );
/*
* print top 5 most profitable products
*/
/* construct binding for ProductId and Profit fields in Result class */
struct es_mbc *c_res;
struct es_mbb *b_res;
struct es_mbn *n_res;
uint16_t f_productid;
double f_profit;
check( es_mb_getclass( rdb, "Result", &c_res ) );
check( es_mb_getnode( rdb, c_res, &n_res ) );
check( es_mbb_new( c_res, &b_res ) );
check( es_mbb_i16_v( b_res, "ProductId", &f_productid ) );
check( es_mbb_f64_v( b_res, "Profit", &f_profit ) );
/* construct sorted iterator. Use ES_ORDER_BOTTOM since we only want the
first 5 entries and don't want to sort the entire node */
struct es_mbs *s_res;
check( es_mbs_new( b_res, ES_ORDER_BOTTOM, &s_res ) );
check( es_mbs_add( s_res, "Profit" ) );
/* extract first five entries */
printf("Rank,Product,Profit\n");
int i,r = es_mbs_beginfetch( s_res, n_res );
for( i=0; i!=5 && r == ES_ERROR_NONE; ++i ) {
/* get name of product from string dictionary by id */
const char *prodname = es_sd_getstr( s_product, f_productid );
printf( "%d,%s,%f\n", i+1,prodname, f_profit );
r = es_mbs_incfetch( s_res );
}
printf( "\n" );
/* clean-up aggregator and query results */
es_mbb_delete( b_res );
es_mbs_delete( s_res );
es_mb_delete( rdb );
es_mbq_delete( qry );
}
void queryunitsales( struct es_mb *db, int lowdateid, int highdateid )
{
printf( "Compute TotalUnitSales by date from %d to %d\n\n",
es_date_getcomp( lowdateid ), es_date_getcomp( highdateid ) );
/* retrieve DateSales class definition */
struct es_mbc *c_datesales;
check( es_mb_getclass( db, "DateSales", &c_datesales ) );
/* retrieve DateSales root node from database */
struct es_mbn *n_datesales;
check( es_mb_getnode( db, c_datesales, &n_datesales ) );
/* construct field-bindings and iterator for DateSales */
struct es_mbb *b_datesales;
struct es_mbi *i_datesales;
uint32_t f_dateid, f_totalunitsales;
check( es_mbb_new( c_datesales, &b_datesales ) );
check( es_mbi_new( b_datesales, &i_datesales ) );
check( es_mbb_i32_v( b_datesales, "DateId", &f_dateid ) );
check( es_mbb_i32_v( b_datesales, "TotalUnitSales", &f_totalunitsales ) );
/* construct membership filter for DateId */
struct es_mbm *m_dateid;
check( es_mbm_new( ES_MEMBERSHIP_IN, &m_dateid ) );
check( es_mbm_addrange( m_dateid, lowdateid, highdateid ) );
/* add membership filter to DateSales field-binding */
check( es_mbb_mbm( b_datesales, "DateId", m_dateid ) );
/* iterator for DateSales node */
printf("Date,TotalUnitSales\n");
int r;
for( r = es_mbi_beginfetch( i_datesales, n_datesales );
r == ES_ERROR_NONE; r = es_mbi_incfetch( i_datesales ) ) {
printf( "%02d/%02d/%04d,%d\n",
es_date_getmonth( f_dateid ),
es_date_getday( f_dateid ),
es_date_getyear( f_dateid ),
f_totalunitsales );
}
/* clean-up */
es_mbm_delete( m_dateid );
es_mbb_delete( b_datesales );
es_mbi_delete( i_datesales );
}
int main( int argc, char **argv )
{
/* initalize date calculator */
es_date_init( 2007, 2010 );
/* construct database definition */
struct es_mbd *def = genschema();
/* construct empty database from definition */
struct es_mb *db;
check( es_mb_new( def, &db ) );
/* populate Product root node and string dictionary of product names */
struct es_sd *s_product = es_sd_new( SDCHUNKSIZE, ES_SD_STORESTRINGS );
genproduct( db, s_product );
/* populate DateSales root and child nodes */
gensales( db );
unsigned lowdate,highdate;
/* run query to determine the 5 most profitable products for sales
to a particular customer and range of dates (in this case the whole of 2008) */
lowdate = es_date_getid_i( 2008, 1, 1 );
highdate = es_date_getid_i( 2008, 12, 31 );
queryprofit( db, s_product, lowdate, highdate, 20 );
queryprofitq( db, s_product, lowdate, highdate, 20 );
/* print total unit sales for dates in some range (in this case April 2007) */
lowdate = es_date_getid_i( 2007, 4, 1 );
highdate = es_date_getid_i( 2007, 4, 30 );
queryunitsales( db, lowdate, highdate );
/* delete database and definition */
es_sd_delete( s_product );
es_mb_delete( db );
es_mbd_delete( def );
es_date_destroy();
return 0;
}