/* * Copyright (C) 2012 by Richard Brooks * The file is part of the exstreamspeed library * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include /* * Example program to explore the basics of the core database API. * The program does the following: * 1) Define and populate a sales database that records unit and dollar * sales by date, product and customer. Maintains aggregate unit sales by date. * 2) Query database to compute the top 5 most profitable products * for sales against a specific customer and range of dates. * 3) Query database to compute total unit sales by date for a range of dates * * genschema() explores how to define a database definition. Uses es_mbd and * es_mbc interfaces. * * genproduct() explores how to update a database root node using a string key field * using the es_mb, es_mbb, es_mbi and es_sd interfaces: * * gensales() explores how to update a database node hierarchy. Shows how to * construct new child nodes and how to update a database field incrementally * using binding by-reference. * Uses the es_mb, es_mbb, es_mbi, es_mbn and es_date interfaces. * * queryprofit() explores how to run complex queries against a database using * a database aggregator, membership filter and sorted iterator. * Uses es_mbm, es_mbd, es_mb, es_mba, es_mbb, es_mbs and es_sd interfaces. * * queryunitsales() explores how to run simpler queries against a database using * a filtered iterator. Uses es_mb, es_mbm, es_mbb, es_mbi and es_date interfaces. */ /* constants driving size of database construction */ #define NUMPRODUCT 1000 #define NUMCUSTOMER 250 #define NUMDATES 600 #define NUMSALESPERDATE 10000 /* buffer sizes */ #define SDCHUNKSIZE 4096 #define EBUFSIZE 1024 #define QPLANSIZE 4096 void check( int ret ) { /* simplistic error checking - abort on error */ if ( ret != ES_ERROR_NONE && ret != ES_ERROR_FAILTOFIND ) { printf( "error=%d\n", ret ); abort(); } } struct es_mbd *genschema() { /* construct new empty schema */ struct es_mbd *def = es_mbd_new(); /* construct root class Product( ProductId, ProductCost ) that maintains product production cost by product-id */ struct es_mbc *c_product; check( es_mbc_new( def, "Product", 1000, ES_INDEX_NONE, NULL, &c_product ) ); check( es_mbc_addfld( c_product, "ProductId", ES_FLDTYPE_I16 ) ); check( es_mbc_addfld( c_product, "ProductCost", ES_FLDTYPE_F64 ) ); check( es_mbc_addfldidx( c_product, "ProductId" ) ); /* construct root class DateSales( DateId, TotalUnitSales ) that maintains aggregate unit sales sales by date-id */ struct es_mbc *c_datesales; check( es_mbc_new( def, "DateSales", 200, ES_INDEX_NONE, NULL, &c_datesales ) ); check( es_mbc_addfld( c_datesales, "DateId", ES_FLDTYPE_I32 ) ); check( es_mbc_addfld( c_datesales, "TotalUnitSales", ES_FLDTYPE_I32 ) ); check( es_mbc_addfldidx( c_datesales, "DateId" ) ); /* construct child class Sales(ProductId,CustomerId,UnitSales,DollarSales) that maintains the break-down of unit and dollar sales by product and customer corresponding to a particular date */ struct es_mbc *c_sales; check( es_mbc_new( def, "Sales", 2000, ES_INDEX_SHARED, c_datesales, &c_sales ) ); check( es_mbc_addfld( c_sales, "ProductId", ES_FLDTYPE_I16 ) ); check( es_mbc_addfld( c_sales, "CustomerId", ES_FLDTYPE_I16 ) ); check( es_mbc_addfld( c_sales, "UnitSales", ES_FLDTYPE_I32 ) ); check( es_mbc_addfld( c_sales, "DollarSales", ES_FLDTYPE_F64 ) ); check( es_mbc_addfldidx( c_sales, "ProductId" ) ); check( es_mbc_addfldidx( c_sales, "CustomerId" ) ); return def; } void genproduct( struct es_mb *db, struct es_sd *s_prod ) { /* * Populate Product root node and string dictionary */ /* retrieve Product class deinition from database */ struct es_mbc *c_product; check( es_mb_getclass( db, "Product", &c_product ) ); /* retrieve Product root node from database */ struct es_mbn *n_product; check( es_mb_getnode( db, c_product, &n_product ) ); /* construct field-binding for Product class definition */ struct es_mbb *b_product; check( es_mbb_new( c_product, &b_product ) ); /* construct iterator from Product field-binding */ struct es_mbi *i_product; check( es_mbi_new( b_product, &i_product ) ); /* add a binding for ProductId */ uint16_t f_productid; check( es_mbb_i16_v( b_product, "ProductId", &f_productid ) ); /* add a binding for ProductCost */ double f_productcost; check( es_mbb_f64_v( b_product, "ProductCost", &f_productcost ) ); /* generate NUMPRODUCT products */ char prodname[256]; unsigned i; for( i=0; i != NUMPRODUCT; ++i ) { /* generate product name */ sprintf( prodname, "Product_%03d", i ); /* add to string dictionary - ProductId is assigned the string-id */ unsigned idx; es_sd_add( s_prod, &idx, prodname ); f_productid = idx; /* derive production cost */ f_productcost = (double)(f_productid % 100); /* update Product node with ProductId and ProductCost */ check( es_mbi_findupdate( i_product, n_product ) ); } /* clean-up */ es_mbb_delete( b_product ); es_mbi_delete( i_product ); } void gensales( struct es_mb *db ) { /* * Populate DateSales and Sales */ printf( "Generating %d dates x %d sales => %d random-access updates\n\n", NUMDATES, NUMSALESPERDATE, NUMDATES*(1+NUMSALESPERDATE) ); /* retrieve DateSales and Sales class definitions */ struct es_mbc *c_datesales, *c_sales; check( es_mb_getclass( db, "DateSales", &c_datesales ) ); check( es_mb_getclass( db, "Sales", &c_sales ) ); /* retrieve DateSales root node from database */ struct es_mbn *n_datesales; check( es_mb_getnode( db, c_datesales, &n_datesales ) ); /* construct field-bindings from DateSales and Sales class definition */ struct es_mbb *b_datesales, *b_sales; check( es_mbb_new( c_datesales, &b_datesales ) ); check( es_mbb_new( c_sales, &b_sales ) ); /* construct iterators from DateSales and Sales field-bindings */ struct es_mbi *i_datesales, *i_sales; check( es_mbi_new( b_datesales, &i_datesales ) ); check( es_mbi_new( b_sales, &i_sales ) ); /* add field-bindings for DateId */ uint32_t f_dateid; check( es_mbb_i32_v( b_datesales, "DateId", &f_dateid ) ); /* add binding by-reference for TotalUnitSales as we're going to update this derived aggregate field incrementally */ uint32_t *f_totalunitsales; check( es_mbb_i32_r( b_datesales, "TotalUnitSales", &f_totalunitsales ) ); /* add field-binding by-reference for Sales child node */ struct es_mbn **f_sales; check( es_mbb_mbn_r( b_datesales, "Sales", &f_sales ) ); /* add field bindings for ProductId, CustomerId, UnitSales and DollarSales */ uint16_t f_productid, f_customerid; uint32_t f_unitsales; double f_dollarsales; check( es_mbb_i16_v( b_sales, "ProductId", &f_productid ) ); check( es_mbb_i16_v( b_sales, "CustomerId", &f_customerid ) ); check( es_mbb_i32_v( b_sales, "UnitSales", &f_unitsales ) ); check( es_mbb_f64_v( b_sales, "DollarSales", &f_dollarsales ) ); /* loop over NUMDATES calendar days starting from 4/1/2007 */ f_dateid = es_date_getid_i( 2007, 4, 14 ); unsigned i; for( i=0; i != NUMDATES; ++i, ++f_dateid ) { /* update row in DateSales */ check( es_mbi_findupdate( i_datesales, n_datesales ) ); /* construct Sales child node if missing */ if ( ! *f_sales ) { check( es_mbn_new( db, c_sales, f_sales ) ); } /* each date has the same set of NUMSALESPERDATE combinations of ProductId and CustomerId, but different UnitSales and DollarSales */ unsigned i; uint32_t unitsale = f_dateid%77; f_productid = 17; f_customerid = 43; for( i=0; i!= NUMSALESPERDATE; ++i ) { /* construct ProductId, CustomerId, UnitSales, DollarSales */ if ( ++f_productid == NUMPRODUCT ) { f_productid = 1; } if ( ++f_customerid == NUMCUSTOMER ) { f_customerid = 1; } f_unitsales = unitsale+i%49; f_dollarsales = f_unitsales*(13.0+i%17); /* update aggregate unit sales for this DateId */ *f_totalunitsales += f_unitsales; /* update this Sales child */ check( es_mbi_findupdate( i_sales, *f_sales ) ); } } /* clean-up */ es_mbb_delete( b_datesales ); es_mbi_delete( i_datesales ); es_mbb_delete( b_sales ); es_mbi_delete( i_sales ); } void queryprofit( struct es_mb *db, struct es_sd *s_product, int lowdateid, int highdateid, int customerid ) { /* * Define and execute a database aggregator to run query */ printf( "Compute top 5 most profitable ProductIds for CustomerId=%d from %d to %d\n\n", customerid, es_date_getcomp( lowdateid ), es_date_getcomp( highdateid ) ); /* construct membership filter for DateId */ struct es_mbm *m_dateid; check( es_mbm_new( ES_MEMBERSHIP_IN, &m_dateid ) ); check( es_mbm_addrange( m_dateid, lowdateid, highdateid ) ); /* construct membership filter for CustomerId */ struct es_mbm *m_customerid; check( es_mbm_new( ES_MEMBERSHIP_IN, &m_customerid ) ); check( es_mbm_add( m_customerid, customerid ) ); /* define result database definition and class to store query results */ struct es_mbd *rdef = es_mbd_new(); struct es_mbc *c_res; check( es_mbc_new( rdef, "Result", 200, ES_INDEX_PRIVATE, NULL, &c_res ) ); /* construct query aggregator against input database definition */ struct es_mba *agg; char ebuf[EBUFSIZE], queryplan[QPLANSIZE]; struct es_mbd *def = es_mb_getdefinition( db ); check( es_mba_new( def, &agg, ebuf, EBUFSIZE ) ); /* add ProductId as key of query result */ check( es_mba_addkey( agg, "ProductId" ) ); /* add membership filters for DateId and CustomerId to aggregator */ check( es_mba_addmembership( agg, "DateId", m_dateid ) ); check( es_mba_addmembership( agg, "CustomerId", m_customerid ) ); /* define derived value Profit */ check( es_mba_addformula( agg, "Profit", "DollarSales - UnitSales * ProductCost", ES_FLDTYPE_F64, ebuf, EBUFSIZE ) ); /* add Profit as an aggregate result field */ check( es_mba_addresult( agg, "Profit", ES_AGGREGATE_SUM ) ); /* prepare and display the query plan for the aggregator */ check( es_mba_prepare( agg, rdef, "Result", queryplan, QPLANSIZE ) ); printf( "Aggregator Query Plan:\n%s\n", queryplan ); /* construct an empty result database with which to fill query results */ struct es_mb *rdb; check( es_mb_new( rdef, &rdb ) ); /* execute query against input and result databases */ check( es_mba_exec( agg, db, rdb ) ); /* * print top 5 most profitable products */ /* construct binding for ProductId and Profit fields in Result class */ struct es_mbb *b_res; struct es_mbn *n_res; uint16_t f_productid; double f_profit; check( es_mb_getnode( rdb, c_res, &n_res ) ); check( es_mbb_new( c_res, &b_res ) ); check( es_mbb_i16_v( b_res, "ProductId", &f_productid ) ); check( es_mbb_f64_v( b_res, "Profit", &f_profit ) ); /* construct sorted iterator. Use ES_ORDER_BOTTOM since we only want the first 5 entries and don't want to sort the entire node */ struct es_mbs *s_res; check( es_mbs_new( b_res, ES_ORDER_BOTTOM, &s_res ) ); check( es_mbs_add( s_res, "Profit" ) ); /* extract first five entries */ printf("Rank,Product,Profit\n"); int i,r = es_mbs_beginfetch( s_res, n_res ); for( i=0; i!=5 && r == ES_ERROR_NONE; ++i ) { /* get name of product from string dictionary by id */ const char *prodname = es_sd_getstr( s_product, f_productid ); printf( "%d,%s,%f\n", i+1,prodname, f_profit ); r = es_mbs_incfetch( s_res ); } printf( "\n" ); /* clean-up aggregator and query results */ es_mbb_delete( b_res ); es_mbs_delete( s_res ); es_mbm_delete( m_dateid ); es_mbm_delete( m_customerid ); es_mb_delete( rdb ); es_mbd_delete( rdef ); es_mba_delete( agg ); } void queryprofitq( struct es_mb *db, struct es_sd *s_product, int lowdateid, int highdateid, int customerid ) { /* * Define and execute a database query expression */ printf( "Compute top 5 most profitable ProductIds for CustomerId=%d from %d to %d\n\n", customerid, es_date_getcomp( lowdateid ), es_date_getcomp( highdateid ) ); /* construct query aggregator against input database definition */ struct es_mbq *qry; check( es_mbq_new( &qry ) ); /* define membership query string for date filter */ check( es_mbq_add( qry, "mbm( id=DateF memtype=in " ) ); check( es_mbq_add( qry, "addrange( low=" )); check( es_mbq_addint( qry, lowdateid ) ); check( es_mbq_add( qry, " high=" ) ); check( es_mbq_addint( qry, highdateid ) ); check( es_mbq_add( qry, "))\n" ) ); /* define membership query string for customer filter */ check( es_mbq_add( qry, "mbm( id=CustomerF memtype=in " ) ); check( es_mbq_add( qry, "add( value=" )); check( es_mbq_addint( qry, customerid ) ); check( es_mbq_add( qry, "))\n" ) ); /* define result mbc */ check( es_mbq_add( qry, "mbc( id=Result batch=200 index=private)\n" ) ); /* define aggregation query */ check( es_mbq_add( qry, "mba( refmbc=Result " ) ); check( es_mbq_add( qry, "addmembership( field=DateId refmbm=DateF ) " ) ); check( es_mbq_add( qry, "addmembership( field=CustomerId refmbm=CustomerF ) " ) ); check( es_mbq_add( qry, "addformula( field=Profit type=f64 " ) ); check( es_mbq_add( qry, " def=\"DollarSales - UnitSales * ProductCost\") " ) ); check( es_mbq_add( qry, "addkey( field=ProductId ) " ) ); check( es_mbq_add( qry, "addresult( field=Profit aggtype=sum ))" ) ); /* prepare and print query plan */ char ebuf[EBUFSIZE], queryplan[QPLANSIZE]; struct es_mbd *def = es_mb_getdefinition( db ); int pres = es_mbq_prepare( qry, def, NULL, queryplan, QPLANSIZE, ebuf, EBUFSIZE ); if ( pres != ES_ERROR_NONE ) { printf( "error preparing query code=%d, errmsg=%s\n", pres, ebuf ); abort(); } printf( "Aggregator Query Plan Derived From Query Language Expression:\n%s\n", queryplan ); /* execute query */ struct es_mb *rdb; check( es_mbq_exec( qry, db, &rdb ) ); /* * print top 5 most profitable products */ /* construct binding for ProductId and Profit fields in Result class */ struct es_mbc *c_res; struct es_mbb *b_res; struct es_mbn *n_res; uint16_t f_productid; double f_profit; check( es_mb_getclass( rdb, "Result", &c_res ) ); check( es_mb_getnode( rdb, c_res, &n_res ) ); check( es_mbb_new( c_res, &b_res ) ); check( es_mbb_i16_v( b_res, "ProductId", &f_productid ) ); check( es_mbb_f64_v( b_res, "Profit", &f_profit ) ); /* construct sorted iterator. Use ES_ORDER_BOTTOM since we only want the first 5 entries and don't want to sort the entire node */ struct es_mbs *s_res; check( es_mbs_new( b_res, ES_ORDER_BOTTOM, &s_res ) ); check( es_mbs_add( s_res, "Profit" ) ); /* extract first five entries */ printf("Rank,Product,Profit\n"); int i,r = es_mbs_beginfetch( s_res, n_res ); for( i=0; i!=5 && r == ES_ERROR_NONE; ++i ) { /* get name of product from string dictionary by id */ const char *prodname = es_sd_getstr( s_product, f_productid ); printf( "%d,%s,%f\n", i+1,prodname, f_profit ); r = es_mbs_incfetch( s_res ); } printf( "\n" ); /* clean-up aggregator and query results */ es_mbb_delete( b_res ); es_mbs_delete( s_res ); es_mb_delete( rdb ); es_mbq_delete( qry ); } void queryunitsales( struct es_mb *db, int lowdateid, int highdateid ) { printf( "Compute TotalUnitSales by date from %d to %d\n\n", es_date_getcomp( lowdateid ), es_date_getcomp( highdateid ) ); /* retrieve DateSales class definition */ struct es_mbc *c_datesales; check( es_mb_getclass( db, "DateSales", &c_datesales ) ); /* retrieve DateSales root node from database */ struct es_mbn *n_datesales; check( es_mb_getnode( db, c_datesales, &n_datesales ) ); /* construct field-bindings and iterator for DateSales */ struct es_mbb *b_datesales; struct es_mbi *i_datesales; uint32_t f_dateid, f_totalunitsales; check( es_mbb_new( c_datesales, &b_datesales ) ); check( es_mbi_new( b_datesales, &i_datesales ) ); check( es_mbb_i32_v( b_datesales, "DateId", &f_dateid ) ); check( es_mbb_i32_v( b_datesales, "TotalUnitSales", &f_totalunitsales ) ); /* construct membership filter for DateId */ struct es_mbm *m_dateid; check( es_mbm_new( ES_MEMBERSHIP_IN, &m_dateid ) ); check( es_mbm_addrange( m_dateid, lowdateid, highdateid ) ); /* add membership filter to DateSales field-binding */ check( es_mbb_mbm( b_datesales, "DateId", m_dateid ) ); /* iterator for DateSales node */ printf("Date,TotalUnitSales\n"); int r; for( r = es_mbi_beginfetch( i_datesales, n_datesales ); r == ES_ERROR_NONE; r = es_mbi_incfetch( i_datesales ) ) { printf( "%02d/%02d/%04d,%d\n", es_date_getmonth( f_dateid ), es_date_getday( f_dateid ), es_date_getyear( f_dateid ), f_totalunitsales ); } /* clean-up */ es_mbm_delete( m_dateid ); es_mbb_delete( b_datesales ); es_mbi_delete( i_datesales ); } int main( int argc, char **argv ) { /* initalize date calculator */ es_date_init( 2007, 2010 ); /* construct database definition */ struct es_mbd *def = genschema(); /* construct empty database from definition */ struct es_mb *db; check( es_mb_new( def, &db ) ); /* populate Product root node and string dictionary of product names */ struct es_sd *s_product = es_sd_new( SDCHUNKSIZE, ES_SD_STORESTRINGS ); genproduct( db, s_product ); /* populate DateSales root and child nodes */ gensales( db ); unsigned lowdate,highdate; /* run query to determine the 5 most profitable products for sales to a particular customer and range of dates (in this case the whole of 2008) */ lowdate = es_date_getid_i( 2008, 1, 1 ); highdate = es_date_getid_i( 2008, 12, 31 ); queryprofit( db, s_product, lowdate, highdate, 20 ); queryprofitq( db, s_product, lowdate, highdate, 20 ); /* print total unit sales for dates in some range (in this case April 2007) */ lowdate = es_date_getid_i( 2007, 4, 1 ); highdate = es_date_getid_i( 2007, 4, 30 ); queryunitsales( db, lowdate, highdate ); /* delete database and definition */ es_sd_delete( s_product ); es_mb_delete( db ); es_mbd_delete( def ); es_date_destroy(); return 0; }