# Copyright (C) 2008-2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
# USA

#
# This grammar is an example on how to create grammars that operate against tables
# which have different structure and it only makes sense to join them in certain
# ways and on certain fields. The following principles apply:
#
# * The first table is always the ontime table, with alias a1. This provies a fixed reference
#	point for subsequent joins
#
# * The SELECT list is either SELECT * , SELECT COUNT(*) , or may use the fact that the fields
#	in the a1 table are always known
#
# * There is always a join, therefore there is always an a2 table, however it may be one of
#	several options
#
# * Each potential join is listed separately with a specific join condition that should be realistic
# 
# * In the WHERE clause, if we generate a condition for which the table is not present, the condition is
#	commented out in order to avoid semantic errors
#

query_init:
	USE flightstats;

# The queries from this grammar may produce resultsets of varying length
# to avoid excessive memory usage, we reduce all queries to a COUNT(*)
# This way, we also do not have to tinker with any field names in the SELECT list

query:
	{ $alias_count = 0 ; %tables = () ; %aliases = () ; return undef ; }
	select ;

select:
	SELECT aggregate_item FROM join_list WHERE where |
	SELECT a1 . * FROM join_list WHERE where ORDER BY a1 . `id` LIMIT _digit ;

aggregate_item:
	COUNT(*) | MIN(a1 . dep_time) | SUM( distinct a1 . distance) | MAX(a1 . id) | COUNT( distinct a1 . tail_num ) ;

distinct:
	| DISTINCT ;

#
# We divide the joins into "big", those containing the `ontime` table, and 
# small, those containing stuff like states and ZIP codes.
#

join_list:
#	big_join_item |
	( big_join_item ) third_join_item |
	( big_join_item ) CROSS JOIN small_join_item ;

big_join_item:
	ontime2carrier | ontime2airport | ontime2aircraft ;
	
small_join_item:
	airport2state | airport2zipcode | aircraft2engine | airport2remark;

third_join_item:
	first2carrier | first2airport | first2aircraft ;

#
# Here we define only joins that are meaningful, useful and very likely to use indexes.
#

ontime2carrier:
	ontime_table INNER JOIN carrier_table ON ( previous_table . `carrier` = current_table . `code` );

first2carrier:
	INNER JOIN carrier_table ON ( a1 . `carrier` = current_table . `code` );

ontime2airport:
	ontime_table INNER JOIN airport_table ON ( previous_table . origin_destination = current_table .`code` ) ;

first2airport:
	INNER JOIN airport_table ON ( a1 . origin_destination = current_table .`code` );
	
origin_destination:
	`origin` | `destination` ;

ontime2aircraft:
	ontime_table INNER JOIN aircraft_table ON ( current_table .`tail_num` = previous_table .`tail_num` ) ;

first2aircraft:
	INNER JOIN aircraft_table ON ( a1 .`tail_num` = current_table .`tail_num` );

airport2state:
	airport_table INNER JOIN state_table ON ( previous_table . `state` = current_table . `state_code` );

airport2zipcode:
	airport_table INNER JOIN zipcode_table ON ( previous_table . `state` = current_table . `state_code` );

airport2remark:
	airport_table INNER JOIN remark_table USING ( `site_number` ) ;

aircraft2engine:
	aircraft_table INNER JOIN engine_table USING ( `aircraft_engine_code` ) ;

ontime_table:
	`ontime_mysiam` { $table_name = 'ontime'; return undef; } new_table;

carrier_table:
	`carriers` { $table_name = 'carriers'; return undef; } new_table ;

airport_table:
	`airports` { $table_name = 'airports'; return undef; } new_table ;

remark_table:
	`airport_remarks` { $table_name = 'airport_remarks' ; return undef; } new_table ;

aircraft_table:
	`aircraft` { $table_name = 'aircraft'; return undef; } new_table;

engine_table:
	`aircraft_engines` { $table_name = 'aircraft_engines' ; return undef ; } new_table;

state_table:
	`states` { $table_name = 'states' ; return undef; } new_table ;

zipcode_table:
	`zipcodes` { $table_name = 'zipcodes' ; return undef; } new_table ;

#
# We always have a WHERE and it contains a lot of expressions combined with an AND in order to provide
# numerous opportunities for optimization and reduce calculation times.
# In addition, we always define at least one condition against the `ontime` table.
#

where:
	{ $condition_table = 'ontime' ; return undef; } start_condition ontime_condition end_condition AND
	( where_list );

where_list:
	where_condition AND where_condition AND where_condition AND where_condition AND where_condition AND where_condition ;

#
# Each of the conditions described below are valid and meaningful for the particular table in question
# They are likely to use indexes and/or zero down on a smaller number of records
# 

where_condition:
	{ $condition_table = 'ontime' ; return undef; } start_condition ontime_condition end_condition |
	{ $condition_table = 'carriers' ; return undef; } start_condition carrier_condition end_condition |
	{ $condition_table = 'aircraft'; return undef; } start_condition aircraft_condition end_condition |
	{ $condition_table = 'airports'; return undef; } start_condition airport_condition end_condition |
	{ $condition_table = 'states'; return undef; } start_condition state_condition end_condition |
	{ $condition_table = 'zipcodes'; return undef; } start_condition zipcode_condition end_condition |
	{ $condition_table = 'airport_remarks'; return undef; } start_condition remark_condition end_condition |
	{ $condition_table = 'aircraft_engines'; return undef; } start_condition engine_condition end_condition ;

ontime_condition:
	table_alias . `carrier` generic_carrier_expression |
	table_alias . `origin` generic_code_expression |
	table_alias . `destination` generic_code_expression |
	table_alias . `origin` generic_code_expression AND table_alias . `destination` generic_code_expression |
	table_alias . `origin` generic_code_expression OR table_alias . `destination` generic_code_expression |
	table_alias . `tail_num` generic_char_expression ;

state_condition:
	table_alias . `state_code` generic_state_expression |
	table_alias . `name` generic_char_expression ;

zipcode_condition:
	table_alias . `zipcode` BETWEEN 10000 + ( _tinyint_unsigned * 100) AND 10000 + ( _tinyint_unsigned * 100) ;

table_alias:
	{ my $alias = shift @{$aliases{$condition_table}}; push @{$aliases{$condition_table}} , $alias ; return $alias } ;

carrier_condition:
	table_alias . `code` generic_carrier_expression;

generic_carrier_expression:
	= single_carrier |
	IN ( carrier_list ) ;

airport_condition:
	table_alias . `code` generic_code_expression |
	table_alias . `state` generic_state_expression |
	( table_alias . `state` generic_state_expression ) AND ( table_alias . `city` generic_char_expression) |
	table_alias . `longitude` BETWEEN _tinyint AND _tinyint_unsigned ;

aircraft_condition:
	table_alias . `tail_num` generic_char_expression |
	table_alias . `state` generic_state_expression ;

engine_condition:
	table_alias . `manufacturer` generic_char_expression ;

remark_condition:
	table_alias . `airport_remark_id` BETWEEN _tinyint_unsigned AND _smallint_unsigned ;

generic_char_expression:
	BETWEEN _char[invariant] AND CHAR(ASCII( _char[invariant] ) + one_two ) |
	LIKE 'N10%' |	# 6098 aircraft with tail num starting with N10 
	LIKE 'N9Q%' ;	# 10 aircraft starting with N9Q

one_two:
	1 | 2 ;

generic_code_expression:
#	BETWEEN _char[invariant] AND CHAR(ASCII( _char[invariant] ) + one_two ) |
	= single_airport |
	IN ( airport_list ) ;

single_airport:
	'ORD' |		# busiest airport
	'AKN' |		# un-busiest airport
	'BIS' |		# 100 flights 
	'LIT' |		# 1000 flights
	'MSP' ; 	# 10000 flights

airport_list:
	single_airport |
	single_airport , airport_list ;

generic_state_expression:
	= single_state |
	IN ( state_list ) |
	BETWEEN _char(2) AND _char(2) ;

state_list:
	single_state |
	single_state , state_list ;

carrier_list:
	single_carrier |
	single_carrier , carrier_list ;

single_state:
	'AK' | 'AL' | 'AR' | 'AS' | 'AZ' | 'CA' | 'CO' | 'CQ' | 'CT' | 'DC' | 'DE' | 'FL' | 'GA' | 'GU' | 'HI' | 'IA' | 'ID' | 'IL' | 'IN' | 'KS' | 'KY' | 'LA' | 'MA' | 'MD' | 'ME' | 'MI' | 'MN' | 'MO' | 'MQ' | 'MS' | 'MT' | 'NC' | 'ND' | 'NE' | 'NH' | 'NJ' | 'NM' | 'NV' | 'NY' | 'OH' | 'OK' | 'OR' | 'PA' | 'PR' | 'RI' | 'SC' | 'SD' | 'TN' | 'TX' | 'UT' | 'VA' | 'VI' | 'VT' | 'WA' | 'WI' | 'WQ' | 'WV' | 'WY' ;

single_carrier:
	'AA'|'AQ'|'AS'|'B6'|'CO'|'DH'|'DL'|'EV'|'FL'|'HA'|'HP'|'MQ'|'NW'|'OH'|'OO'|'RU'|'TW'|'TZ'|'UA'|'US'|'WN';

#
# When we define a condition, we check if the table for which this condition would apply is present in 
# the list of the tables we selected for joining. If the table is not present, the condition is still
# generated, but it is commented out in order to avoid "unknown table" errors.
#

start_condition:
	{ ((exists $tables{$condition_table}) ? '' : '/* ') } ;

end_condition:
	{ ((exists $tables{$condition_table}) ? '' : '*/ 1 = 1 ') };

new_table:
	AS { $alias_count++ ; $tables{$table_name}++ ; push @{$aliases{$table_name}}, 'a'.$alias_count ; return 'a'.$alias_count }  ;

current_table:
	{ 'a'.$alias_count };

previous_table:
	{ 'a'.($alias_count - 1) };