271 lines
9.5 KiB
Plaintext
271 lines
9.5 KiB
Plaintext
# Copyright (C) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
|
# Use is subject to license terms.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; version 2 of the License.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
|
|
# USA
|
|
|
|
#
|
|
# This grammar is an example on how to create grammars that operate against tables
|
|
# which have different structure and it only makes sense to join them in certain
|
|
# ways and on certain fields. The following principles apply:
|
|
#
|
|
# * The first table is always the ontime table, with alias a1. This provies a fixed reference
|
|
# point for subsequent joins
|
|
#
|
|
# * The SELECT list is either SELECT * , SELECT COUNT(*) , or may use the fact that the fields
|
|
# in the a1 table are always known
|
|
#
|
|
# * There is always a join, therefore there is always an a2 table, however it may be one of
|
|
# several options
|
|
#
|
|
# * Each potential join is listed separately with a specific join condition that should be realistic
|
|
#
|
|
# * In the WHERE clause, if we generate a condition for which the table is not present, the condition is
|
|
# commented out in order to avoid semantic errors
|
|
#
|
|
|
|
query_init:
|
|
USE flightstats;
|
|
|
|
# The queries from this grammar may produce resultsets of varying length
|
|
# to avoid excessive memory usage, we reduce all queries to a COUNT(*)
|
|
# This way, we also do not have to tinker with any field names in the SELECT list
|
|
|
|
query:
|
|
{ $alias_count = 0 ; %tables = () ; %aliases = () ; return undef ; }
|
|
select ;
|
|
|
|
select:
|
|
SELECT aggregate_item FROM join_list WHERE where |
|
|
SELECT a1 . * FROM join_list WHERE where ORDER BY a1 . `id` LIMIT _digit ;
|
|
|
|
aggregate_item:
|
|
COUNT(*) | MIN(a1 . dep_time) | SUM( distinct a1 . distance) | MAX(a1 . id) | COUNT( distinct a1 . tail_num ) ;
|
|
|
|
distinct:
|
|
| DISTINCT ;
|
|
|
|
#
|
|
# We divide the joins into "big", those containing the `ontime` table, and
|
|
# small, those containing stuff like states and ZIP codes.
|
|
#
|
|
|
|
join_list:
|
|
# big_join_item |
|
|
( big_join_item ) third_join_item |
|
|
( big_join_item ) CROSS JOIN small_join_item ;
|
|
|
|
big_join_item:
|
|
ontime2carrier | ontime2airport | ontime2aircraft ;
|
|
|
|
small_join_item:
|
|
airport2state | airport2zipcode | aircraft2engine | airport2remark;
|
|
|
|
third_join_item:
|
|
first2carrier | first2airport | first2aircraft ;
|
|
|
|
#
|
|
# Here we define only joins that are meaningful, useful and very likely to use indexes.
|
|
#
|
|
|
|
ontime2carrier:
|
|
ontime_table INNER JOIN carrier_table ON ( previous_table . `carrier` = current_table . `code` );
|
|
|
|
first2carrier:
|
|
INNER JOIN carrier_table ON ( a1 . `carrier` = current_table . `code` );
|
|
|
|
ontime2airport:
|
|
ontime_table INNER JOIN airport_table ON ( previous_table . origin_destination = current_table .`code` ) ;
|
|
|
|
first2airport:
|
|
INNER JOIN airport_table ON ( a1 . origin_destination = current_table .`code` );
|
|
|
|
origin_destination:
|
|
`origin` | `destination` ;
|
|
|
|
ontime2aircraft:
|
|
ontime_table INNER JOIN aircraft_table ON ( current_table .`tail_num` = previous_table .`tail_num` ) ;
|
|
|
|
first2aircraft:
|
|
INNER JOIN aircraft_table ON ( a1 .`tail_num` = current_table .`tail_num` );
|
|
|
|
airport2state:
|
|
airport_table INNER JOIN state_table ON ( previous_table . `state` = current_table . `state_code` );
|
|
|
|
airport2zipcode:
|
|
airport_table INNER JOIN zipcode_table ON ( previous_table . `state` = current_table . `state_code` );
|
|
|
|
airport2remark:
|
|
airport_table INNER JOIN remark_table USING ( `site_number` ) ;
|
|
|
|
aircraft2engine:
|
|
aircraft_table INNER JOIN engine_table USING ( `aircraft_engine_code` ) ;
|
|
|
|
ontime_table:
|
|
`ontime_mysiam` { $table_name = 'ontime'; return undef; } new_table;
|
|
|
|
carrier_table:
|
|
`carriers` { $table_name = 'carriers'; return undef; } new_table ;
|
|
|
|
airport_table:
|
|
`airports` { $table_name = 'airports'; return undef; } new_table ;
|
|
|
|
remark_table:
|
|
`airport_remarks` { $table_name = 'airport_remarks' ; return undef; } new_table ;
|
|
|
|
aircraft_table:
|
|
`aircraft` { $table_name = 'aircraft'; return undef; } new_table;
|
|
|
|
engine_table:
|
|
`aircraft_engines` { $table_name = 'aircraft_engines' ; return undef ; } new_table;
|
|
|
|
state_table:
|
|
`states` { $table_name = 'states' ; return undef; } new_table ;
|
|
|
|
zipcode_table:
|
|
`zipcodes` { $table_name = 'zipcodes' ; return undef; } new_table ;
|
|
|
|
#
|
|
# We always have a WHERE and it contains a lot of expressions combined with an AND in order to provide
|
|
# numerous opportunities for optimization and reduce calculation times.
|
|
# In addition, we always define at least one condition against the `ontime` table.
|
|
#
|
|
|
|
where:
|
|
{ $condition_table = 'ontime' ; return undef; } start_condition ontime_condition end_condition AND
|
|
( where_list );
|
|
|
|
where_list:
|
|
where_condition AND where_condition AND where_condition AND where_condition AND where_condition AND where_condition ;
|
|
|
|
#
|
|
# Each of the conditions described below are valid and meaningful for the particular table in question
|
|
# They are likely to use indexes and/or zero down on a smaller number of records
|
|
#
|
|
|
|
where_condition:
|
|
{ $condition_table = 'ontime' ; return undef; } start_condition ontime_condition end_condition |
|
|
{ $condition_table = 'carriers' ; return undef; } start_condition carrier_condition end_condition |
|
|
{ $condition_table = 'aircraft'; return undef; } start_condition aircraft_condition end_condition |
|
|
{ $condition_table = 'airports'; return undef; } start_condition airport_condition end_condition |
|
|
{ $condition_table = 'states'; return undef; } start_condition state_condition end_condition |
|
|
{ $condition_table = 'zipcodes'; return undef; } start_condition zipcode_condition end_condition |
|
|
{ $condition_table = 'airport_remarks'; return undef; } start_condition remark_condition end_condition |
|
|
{ $condition_table = 'aircraft_engines'; return undef; } start_condition engine_condition end_condition ;
|
|
|
|
ontime_condition:
|
|
table_alias . `carrier` generic_carrier_expression |
|
|
table_alias . `origin` generic_code_expression |
|
|
table_alias . `destination` generic_code_expression |
|
|
table_alias . `origin` generic_code_expression AND table_alias . `destination` generic_code_expression |
|
|
table_alias . `origin` generic_code_expression OR table_alias . `destination` generic_code_expression |
|
|
table_alias . `tail_num` generic_char_expression ;
|
|
|
|
state_condition:
|
|
table_alias . `state_code` generic_state_expression |
|
|
table_alias . `name` generic_char_expression ;
|
|
|
|
zipcode_condition:
|
|
table_alias . `zipcode` BETWEEN 10000 + ( _tinyint_unsigned * 100) AND 10000 + ( _tinyint_unsigned * 100) ;
|
|
|
|
table_alias:
|
|
{ my $alias = shift @{$aliases{$condition_table}}; push @{$aliases{$condition_table}} , $alias ; return $alias } ;
|
|
|
|
carrier_condition:
|
|
table_alias . `code` generic_carrier_expression;
|
|
|
|
generic_carrier_expression:
|
|
= single_carrier |
|
|
IN ( carrier_list ) ;
|
|
|
|
airport_condition:
|
|
table_alias . `code` generic_code_expression |
|
|
table_alias . `state` generic_state_expression |
|
|
( table_alias . `state` generic_state_expression ) AND ( table_alias . `city` generic_char_expression) |
|
|
table_alias . `longitude` BETWEEN _tinyint AND _tinyint_unsigned ;
|
|
|
|
aircraft_condition:
|
|
table_alias . `tail_num` generic_char_expression |
|
|
table_alias . `state` generic_state_expression ;
|
|
|
|
engine_condition:
|
|
table_alias . `manufacturer` generic_char_expression ;
|
|
|
|
remark_condition:
|
|
table_alias . `airport_remark_id` BETWEEN _tinyint_unsigned AND _smallint_unsigned ;
|
|
|
|
generic_char_expression:
|
|
BETWEEN _char[invariant] AND CHAR(ASCII( _char[invariant] ) + one_two ) |
|
|
LIKE 'N10%' | # 6098 aircraft with tail num starting with N10
|
|
LIKE 'N9Q%' ; # 10 aircraft starting with N9Q
|
|
|
|
one_two:
|
|
1 | 2 ;
|
|
|
|
generic_code_expression:
|
|
# BETWEEN _char[invariant] AND CHAR(ASCII( _char[invariant] ) + one_two ) |
|
|
= single_airport |
|
|
IN ( airport_list ) ;
|
|
|
|
single_airport:
|
|
'ORD' | # busiest airport
|
|
'AKN' | # un-busiest airport
|
|
'BIS' | # 100 flights
|
|
'LIT' | # 1000 flights
|
|
'MSP' ; # 10000 flights
|
|
|
|
airport_list:
|
|
single_airport |
|
|
single_airport , airport_list ;
|
|
|
|
generic_state_expression:
|
|
= single_state |
|
|
IN ( state_list ) |
|
|
BETWEEN _char(2) AND _char(2) ;
|
|
|
|
state_list:
|
|
single_state |
|
|
single_state , state_list ;
|
|
|
|
carrier_list:
|
|
single_carrier |
|
|
single_carrier , carrier_list ;
|
|
|
|
single_state:
|
|
'AK' | 'AL' | 'AR' | 'AS' | 'AZ' | 'CA' | 'CO' | 'CQ' | 'CT' | 'DC' | 'DE' | 'FL' | 'GA' | 'GU' | 'HI' | 'IA' | 'ID' | 'IL' | 'IN' | 'KS' | 'KY' | 'LA' | 'MA' | 'MD' | 'ME' | 'MI' | 'MN' | 'MO' | 'MQ' | 'MS' | 'MT' | 'NC' | 'ND' | 'NE' | 'NH' | 'NJ' | 'NM' | 'NV' | 'NY' | 'OH' | 'OK' | 'OR' | 'PA' | 'PR' | 'RI' | 'SC' | 'SD' | 'TN' | 'TX' | 'UT' | 'VA' | 'VI' | 'VT' | 'WA' | 'WI' | 'WQ' | 'WV' | 'WY' ;
|
|
|
|
single_carrier:
|
|
'AA'|'AQ'|'AS'|'B6'|'CO'|'DH'|'DL'|'EV'|'FL'|'HA'|'HP'|'MQ'|'NW'|'OH'|'OO'|'RU'|'TW'|'TZ'|'UA'|'US'|'WN';
|
|
|
|
#
|
|
# When we define a condition, we check if the table for which this condition would apply is present in
|
|
# the list of the tables we selected for joining. If the table is not present, the condition is still
|
|
# generated, but it is commented out in order to avoid "unknown table" errors.
|
|
#
|
|
|
|
start_condition:
|
|
{ ((exists $tables{$condition_table}) ? '' : '/* ') } ;
|
|
|
|
end_condition:
|
|
{ ((exists $tables{$condition_table}) ? '' : '*/ 1 = 1 ') };
|
|
|
|
new_table:
|
|
AS { $alias_count++ ; $tables{$table_name}++ ; push @{$aliases{$table_name}}, 'a'.$alias_count ; return 'a'.$alias_count } ;
|
|
|
|
current_table:
|
|
{ 'a'.$alias_count };
|
|
|
|
previous_table:
|
|
{ 'a'.($alias_count - 1) };
|