.\"	@(#)bmac.std	2.4	10/15/84;
.\" standard format troff commands
.\" citation formatting strings
.ds [[ [
.ds ]] ]
.ds ], ,\|
.ds ]- -
.ds [. " \&
.ds .] .
.ds [, " \&
.ds ,] ,
.ds [? " \&
.ds ?] ?
.ds [: " \&
.ds :] :
.ds [; " \&
.ds ;] ;
.ds [! " \&
.ds !] !
.ds [" " \&
.ds "] \&"
.ds [' " \&
.ds '] '
.ds [< " \&
.ds >]
.\" reference formmating strings
.ds a] " \&
.ds b] , \&
.ds c] , \&
.ds n] "\& and \&
.ds m] "\& and \&
.ds p] .
.\" reference formmating macros
.de s[   \" start reference
.nh
.IP [\\*([F] 5m
..
.de e[   \" end reference
.[-
..
.de []   \" start to display collected references
.LP
..
.de ][   \" choose format
.ie !"\\*([J"" \{\
.    ie !"\\*([V"" .nr t[ 1    \" journal
.    el            .nr t[ 5    \" conference paper
.\}
.el .ie !"\\*([B"" .nr t[ 3    \" article in book
.el .ie !"\\*([R"" .nr t[ 4    \" technical report
.el .ie !"\\*([I"" .nr t[ 2    \" book
.el                .nr t[ 0    \" other
.\\n(t[[
..
.de 0[   \" other
.s[
.if !"\\*([A"" \\*([A\\c
.if !"\\*([T"" , \\*([T\\c
.if !"\\*([V"" , Vol. \\*([V\\c
.if !"\\*([O"" , \\*([O\\c
.if !"\\*([D"" , \\*([D\\c
\&.
.e[
..
.de 1[ \" journal article
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T""  \\*(lq\\*([T\\*(rq,
\\fI\\*([J \\*([V\\fP\c
.ie !"\\*([N"" , \\*([N
.el  
.if !"\\*([D"" (\\*([D)\c
.if !"\\*([P"" , \\*([P\c
.if !"\\*([I"" , \\*([I\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 2[ \" book
.s[
.ie !"\\*([A"" \\*([A,
.el .if !"\\*([E"" \{\
.       ie \\n([E-1 \\*([E, eds.,
.       el \\*([E, ed.,\}
.if !"\\*([T"" \\fI\\*([T\\fP,
.rm a[
.if !"\\*([I"" .ds a[ \\*([I
.if !"\\*([C"" \{\
.       if !"\\*(a["" .as a[ , \\&
.       as a[ \\*([C\}
.if !"\\*([D"" \{\
.       if !"\\*(a["" .as a[ , \\&
.       as a[ \\*([D\}
\\*(a[.
.if !"\\*([G"" Gov. ordering no. \\*([G.
.if !"\\*([O"" \\*([O.
.e[
..
.de 3[ \" article in book
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*(lq\\*([T\\*(rq,
in \\fI\\*([B\\fP\c
.if !"\\*([V"" , vol. \\*([V
.if !~\\*([E~~ \{\
.       ie , \\n([E-1  \\*([E (editors)\c
.       el , \\*([E (editor)\c\}
.if !"\\*([I"" , \\*([I\c
.if !"\\*([C"" , \\*([C\c
.if !"\\*([D"" , \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 4[ \" report
.s[
.if !"\\*([A"" \\*([A,
.if !~\\*([E~~ \{\
.       ie \\n([E-1 \\*([E, editors.
.       el \\*([E, editor.\}
\&\\*(lq\\*([T\\*(rq,
\\*([R\c
.if !"\\*([G"" \& (\\*([G)\c
.if !"\\*([I"" , \\*([I\c
.if !"\\*([C"" , \\*([C\c
.if !"\\*([D"" , \\*([D\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de 5[ \" conference paper
.s[
.if !"\\*([A"" \\*([A,
.if !"\\*([T"" \\*(lq\\*([T\\*(rq,
\\fI\\*([J\\fP,
.if !"\\*([C"" \\*([C,
.if !"\\*([D"" \\*([D\c
.if !"\\*([P"" , \\*([P\c
\\&.
.if !"\\*([O"" \\*([O.
.e[
..
.de [-   \" clean up after yourself
.rm [A [B [C [D
.rm [E [F [G
.rm [I [J [K
.rm [N [O [P
.rm [R [T
.rm [V [W
..
.de s[   \" start reference
.nh
.IP [\\*([F] 10n
..
.ll 6.5i
.nr tp 10
.nr fp 12
.nr sp 12
.nr pp 12
.nr bi 2n
.de IP
.ip \\$1 \\$2
..
.de LP
.lp
..
.de BE
.(b
.sz 9
.ta .2i +.2i +.2i +.2i +.2i +.2i +.2i +.2i +.2i +.2i +.2i +.2i
..
.de EE
.re
.sz \n(pp
.)b
..
.sz 12
.sz \n(pp
.fo ''%''
.(l C
.sz \n(sp
.b
The POSTGRES Data Model\u\(dg\d
.sz \n(pp
.\".sp
.\".r 
.\"(Draft printed: \*(td)
.sp
.i
Lawrence A. Rowe 
Michael R. Stonebraker
.r
.sp
Computer Science Division, EECS Department
University of California
Berkeley, CA 94720
.)l
.\"	*** commands to set 1.5 spacing between lines ***
.vs 16
.nr $r \n(.vu/\n(.su
.nr $R \n($r
.\"   	*** commands to set-up for conference paper ***
.\".bp
.\" set-up for conference paper printing
.\" set 1.5 lines spacing.
.\" set margins for conf paper
.\".nr L 4.0i
.\".ll \nLu
.\"   	*** end commands to set-up for conference paper ***
.sp
.\".2c
.(f
\(dg This research was supported by the National Science Foundation
under Grant DCR-8507256 and 
the Defense Advanced Research Projects Agency (DoD), Arpa
Order No. 4871, monitored by Space and Naval Warfare Systems Command 
under Contract N00039-84-C-0089.
.)f
.sp 2
.(l C
.sz \n(sp
.b Abstract
.)l
.pp
The design of the POSTGRES data model is described.
The data model is a relational model that has been extended with
abstract data types including user-defined operators and procedures,
relation attributes of type procedure, and attribute
and procedure inheritance.
These mechanism can be used to simulate a wide variety of 
semantic and object-oriented data modeling constructs
including aggregation and generalization, complex objects
with shared subobjects, and attributes that reference tuples
in other relations.
.sh 1 "Introduction"
.pp
This paper describes the data model for POSTGRES, a next-generation
extensible database management system being developed at
the University of California 
\*([[StR86\*(]].
The data model is based on the idea of extending the relational
model developed by Codd\*([<\*([[Cod70\*(]]\*(>] with general
mechanisms that can be used to simulate a variety of semantic
data modeling constructs.
The mechanisms include: 1) abstract data types (ADT's), 2) data of
type procedure, and 3) rules.
These mechanisms can be used to support complex objects or
to implement a shared object hierarchy for an object-oriented 
programming language\*([<\*([[Row86\*(]]\*(>].
Most of these ideas have appeared elsewhere
\*([[Ste84\*(],Sto85\*(],Sto86a\*(],Sto86b\*(]].
.pp
We have discovered that some semantic constructs that were
not directly supported can be easily added to the system.
Consequently, we have made several changes to the data model and the
syntax of the query language that are documented here.
These changes include providing support for primary keys,
inheritance of data and procedures,
and attributes that reference tuples in other relations.
.pp
The major contribution of this paper is to show that 
inheritance can be added to a relational data model with only 
a modest number of changes to the model and the implementation of the system.
The conclusion that we draw from this result is that the major concepts
provided in an object-oriented data model (e.g., structured attribute types,
inheritance, union type attributes, and support for shared subobjects) 
can be cleanly and efficiently supported 
in an extensible relational database management system.
The features used to support these mechanisms are abstract data types
and attributes of type procedure.
.pp
The remainder of the paper describes the POSTGRES data model and 
is organized as follows.
Section 2 presents the data model.
Section 3 describes the attribute type system.
Section 4 describes how the query language can be extended
with user-defined procedures.
Section 5 compares the model with other data models and
section 6 summarizes the paper.
.sh 1 "Data Model"
.pp
A database is composed of a collection of \fIrelations\fP
that contain tuples which represent real-world entities
(e.g., documents and people) or relationships (e.g., authorship).
A relation has attributes of fixed types 
that represent properties of the entities and relationships
(e.g., the title of a document) and a primary key.
Attribute types can be atomic (e.g., integer, floating point,
or boolean) or structured (e.g., array or procedure).
The primary key is a sequence of attributes of the relation,
when taken together, uniquely identify each tuple.
.pp
A simple university database will be used to illustrate the model.
The following command defines a relation that represents people:
.BE
\fBcreate\fP PERSON ( Name = char[25], 
	Birthdate = date, Height = int4,
	Weight = int4, StreetAddress = char[25], 
	City = char[25], State = char[2])
.EE
This command defines a relation and creates a structure for storing the tuples.
.pp
The definition of a relation may optionally specify a 
primary key and other relations from which to inherit attributes.
A primary key is a combination of attributes that uniquely identify each tuple.
The key is specified with a \fBkey\fP-clause as follows:
.BE
\fBcreate\fP PERSON ( \.\ \.\ \.)
\fBkey\fP (Name)
.EE
Tuples must have a value for all key attributes.
The specification of a key may optionally include the name of an
operator that is to be used when comparing two tuples.
For example, suppose a relation had a key whose type
was a user-defined ADT.
If an attribute of type \fIbox\fP was part of the primary key,
the comparison operator must be specified 
since different \fIbox\fP operators could be used
to distinguish the entries (e.g., area equals or box equality).  
The following example shows the definition of a relation with a 
key attribute of type \fIbox\fP that
uses the area equals operator (\fIAE\fP) to determine key value equality:
.BE
\fBcreate\fP PICTURE(Title = char[25], Item = box)
\fBkey\fP (Item \fBusing\fP AE)
.EE
.pp
Data inheritance is specified with an \fBinherits\fP-clause.
Suppose, for example, that people in the university database
are employees and/or students and that different 
attributes are to be defined for each category.
The relation for each category includes the \fIPERSON\fP
attributes and the attributes that are specific to the 
category.
These relations can be defined by replicating the \fIPERSON\fP
attributes in each relation definition or by inheriting them
for the definition of \fIPERSON\fP.
Figure 1 shows the relations and an inheritance hierarchy that
could be used to share the definition of the attributes.
.(z
.hl
.sp 2i
.sp
.ce
Figure 1: Relation hierarchy.
.hl
.)z
The commands that define the relations other than the
\fIPERSON\fP relation defined above are:
.BE
\fBcreate\fP EMPLOYEE (Dept = char[25],
	Status = int2, Mgr = char[25],
	JobTitle = char[25], Salary = money)
\fBinherits\fP (PERSON)
.sp 0.5v
\fBcreate\fP STUDENT (Sno = char[12],
	Status = int2, Level = char[20])
\fBinherits\fP (PERSON)
.sp 0.5v
\fBcreate\fP STUDEMP (IsWorkStudy = bool)
\fBinherits\fP (STUDENT, EMPLOYEE)
.EE
.pp
A relation inherits all attributes from its parent(s) unless an attribute
is overriden in the definition.
For example, the \fIEMPLOYEE\fP relation inherits the \fIPERSON\fP
attributes \fIName\fP, \fIBirthdate\fP, \fIHeight\fP, \fIWeight\fP,
\fIStreetAddress\fP, \fICity\fP, and \fIState\fP.
Key specifications are also inherited so \fIName\fP is also the key
for EMPLOYEE.
.pp
Relations may inherit attributes from more than one parent.
For example, \fISTUDEMP\fP inherits attributes from \fISTUDENT\fP
and \fIEMPLOYEE\fP.
An inheritance conflict occurs when the same attribute name is inherited from
more than one parent (e.g., \fISTUDEMP\fP
inherits \fIStatus\fP from \fIEMPLOYEE\fP and \fISTUDENT\fP\|).
If the inherited attributes have the same type, an attribute
with the type is included in the relation that is being defined.
Otherwise, the declaration is disallowed.\**
.(f
\** Most attribute inheritance models have a conflict resolution
rule that selects one of the conflicting attributes.
We chose to disallow inheritance because we could not discover
an example where it made sense, except when the types were identical.
On the other hand, procedure inheritance (discussed below) does use
a conflict resolution rule because many examples exist in which
one procedure is prefered.
.)f
.pp
The POSTGRES query language is a generalized version of 
QUEL\*([<\*([[HSW75\*(]]\*(>], called \fIPOSTQUEL\fP.
QUEL was extended in several directions.
First, POSTQUEL has a \fBfrom\fP-clause to define tuple-variables
rather than a \fBrange\fP command.
Second, arbitrary relation-valued expressions may appear any place
that a relation name could appear in QUEL.
Third, transitive closure and \fBexecute\fP commands
have been added to the language\*([<\*([[Kue84\*(]]\*(>].
And lastly, POSTGRES maintains historical data so POSTQUEL
allows queries to be run on past
database states or on any data that was in the database at any time.
These extensions are described in the remainder of this section.
.pp
The \fBfrom\fP-clause was added to the language so that tuple-variable
definitions for a query could be easily determined at compile-time.
This capability was needed because POSTGRES will, at the user's
request, compile queries and save them in the system catalogs.
The \fBfrom\fP-clause is illustrated in
the following query that lists all work-study students who
are sophomores:
.BE
\fBretrieve\fP (SE.name)
\fBfrom\fP SE \fBin\fP STUDEMP
\fBwhere\fP SE.IsWorkStudy
\ \ \ \ \fBand\fP SE.Status = ``sophomore''
.EE
The \fBfrom\fP-clause specifies the set of tuples over which a
tuple-variable will range.
In this example, the tuple-variable \fISE\fP ranges over the set
of student employees.
.pp
A default tuple-variable with the same name is defined for each relation
referenced in the target-list or \fBwhere\fP-clause of a query.
For example, the query above could have been written:
.BE
\fBretrieve\fP (STUDEMP.name)
\fBwhere\fP STUDEMP.IsWorkStudy
\ \ \ \ \fBand\fP STUDEMP.Status = ``sophomore''
.EE
Notice that the attribute \fIIsWorkStudy\fP is a boolean-valued attribute
so it does not require an explicit value test (e.g., 
\fISTUDEMP.IsWorkStudy = ``true''\fP\|).
.pp
The set of tuples that a tuple-variable may range over can be a 
named relation or a relation-expression.
For example, suppose the user wanted to retrieve all students in the database
who live in Berkeley regardless of
whether they are students or student employees.
This query can be written as follows:
.BE
\fBretrieve\fP (S.name)
\fBfrom\fP S \fBin\fP STUDENT*
\fBwhere\fP S.city = ``Berkeley''
.EE
The ``*'' operator specifies the relation formed by taking
the union of the named relation (i.e., \fISTUDENT\fP) and
all relations that inherit attributes from it (i.e., \fISTUDEMP\fP).
If the ``*'' operator was not used, the query retrieves only tuples
in the student relation (i.e., students who are not student employees).
In most data models that support inheritance the relation name defaults
to the union of relations over the inheritance hierarchy (i.e., the
data described by \fISTUDENT*\fP above).
We chose a different default because queries that involve unions will
be slower than queries on a single relation.
By forcing the user to request the union explicitly with the ``*''
operator, he will be aware of this cost.
.pp
Relation expressions may include other set operators:
.EQ
delim $$
.EN
union ($union$), intersection ($inter$), and difference (\-).
For example, the following query retrieves the names of people
who are students or employees but not student employees:
.BE
\fBretrieve\fP (S.name)
\fBfrom\fP S \fBin\fP (STUDENT $union$ EMPLOYEE)
.EE
Suppose a tuple does not have an attribute referenced elsewhere
in the query.
If the reference is in the target-list, the return tuple will not 
contain the attribute.\**
.(f
\** The application program interface to POSTGRES allows the stream
of tuples passed back to the program to have dynamically varying
columns and types.
.)f
If the reference is in the qualification, the clause containing
the qualification is ``false''.
.pp
POSTQUEL also provides set comparison operators and a relation-constructor
that can be used to specify some difficult queries more easily than in a 
conventional query language.
.EQ
delim off
.EN
For example, suppose that students could have several majors.
The natural representation for this data is to define a separate
relation:
.BE
\fBcreate\fP MAJORS(Sname = char[25],
	Mname = char[25])
.EE
where \fISname\fP is the student's name and \fIMname\fP is the major.
With this representation, the following query retrieves the names of students
with the same majors as Smith:
.BE
\fBretrieve\fP (M1.Sname)
\fBfrom\fP M1 \fBin\fP MAJORS
\fBwhere\fP {(x.Mname) \fBfrom\fP x \fBin\fP MAJORS 
			\fBwhere\fP x.Sname = M1.Sname}
   \(sb {(x.Mname) \fBfrom\fP x \fBin\fP MAJORS
		\fBwhere\fP x.Sname=``Smith''} 
.EE
The expressions enclosed in set symbols (``{\...}'') are 
relation-constructors.
.pp
The general form of a relation-constructor\** is
.(f
\** Relation constructors are really aggregate functions.
We have designed a mechanism to support extensible aggregate functions,
but have not yet worked out the query language syntax and semantics.
.)f
.BE
{(\fItarget-list\fP\|)	\fBfrom\fP \fIfrom-clause\fP
			\fBwhere\fP \fIwhere-clause\fP}
.EE
which specifies the same relation as the query
.BE
\fBretrieve\fP (\fItarget-list\fP\|)
\fBfrom\fP \fIfrom-clause\fP
\fBwhere\fP \fIwhere-clause\fP
.EE
Note that a tuple-variable defined in the outer query (e.g., M1 in
the query above) can be used within a relation-constructor but that a
tuple-variable defined in the relation-constructor cannot be used in
the outer query.
Redefinition of a tuple-variable in a relation constructor creates a
distinct variable as in a block-structured programming language
(e.g., PASCAL).
Relation-valued expressions (including attributes of type procedure
described in the next section) can be used any place in a query that
a named relation can be used.
.pp
Database updates are specified with conventional update
commands as shown in the following examples:
.BE
/* Add a new employee to the database. */
\fBappend\fP \fBto\fP EMPLOYEE(name = \fIvalue\fP,
	age = \fIvalue\fP, ...)
.sp
/* Change state codes using 
	MAP(OldCode, NewCode). */
\fBreplace\fP P(State = MAP.NewCode)
\fBfrom\fP P \fBin\fP PERSON*
\fBwhere\fP P.State = MAP.OldCode
.sp
/* Delete students born before today. */
\fBdelete\fP STUDENT
\fBwhere\fP STUDENT.Birthdate < ``today''
.EE
Deferred update semantics are used for all updates commands.
.pp
POSTQUEL supports the transitive closure commands developed in QUEL*
\*([[Kue84\*(]].
A ``*'' command continues to execute until no
tuples are retrieved (e.g., \fBretrieve*\fP\|) or updated
(e.g., \fBappend*\fP, \fBdelete*\fP, or \fBreplace*\fP\|).
For example, the following query creates a relation that contains
all employees who work for Smith:
.BE
\fBretrieve* into\fP SUBORD(E.Name, E.Mgr)
\fBfrom\fP E \fBin\fP EMPLOYEE, S \fBin\fP SUBORD
\fBwhere\fP E.Name = ``Smith''
	\fBor\fP E.Mgr = S.Name
.EE
This command continues to execute the \fBretrieve-into\fP command
until there are no changes made to the \fISUBORD\fP relation.
.pp
Lastly, POSTGRES saves data deleted from or modified
in a relation so that queries can be executed on historical data.
For example, the following query looks for students who
lived in Berkeley on August 1, 1980:
.BE
\fBretrieve\fP (S.Name)
\fBfrom\fP S \fBin\fP STUDENT[``August 1, 1980'']
\fBwhere\fP S.City = ``Berkeley''
.EE
The date specified in the brackets following the relation name specifies
the relation at the designated time.
The date can be specified in many different formats and optionally
may include a time of day.
The query above only examines students who are not student employees.
To search the set of all students, the \fBfrom\fP-clause would be
.BE
 ...\fBfrom\fP S \fBin\fP STUDENT*[``August 1, 1980'']
.EE
.pp
Queries can also be executed on all data that is currently in the
relation or was in it at some time in the past (i.e., all data).
The following query retrieves all students who ever lived in Berkeley:
.BE
\fBretrieve\fP (S.Name)
\fBfrom\fP S \fBin\fP STUDENT[]
\fBwhere\fP S.City = ``Berkeley''
.EE
The notation ``[]'' can be appended to any relation name.
.pp
Queries can also be specified on data that was in the relation during
a given time period.
The time period is specified by
giving a start- and end-time as shown in the
following query that retrieves students who lived in Berkeley at
any time in August 1980:
.BE
\fBretrieve\fP (S.Name)
\fBfrom\fP S \fBin\fP STUDENT*[``August 1, 1980'',
								``August 31, 1980'']
\fBwhere\fP S.City = ``Berkeley''
.EE
Shorthand notations are supported for all tuples in a relation up
to some date (e.g., \fISTUDENT*[,``August 1, 1980'']\fP\|) or from
some date to the present (e.g., \fISTUDENT*[``August 1, 1980'',\|]\fP\|).
.pp
The POSTGRES default is to save all data unless the user explicitly
requests that data be purged.
Data can be purged before a specific data (e.g., before January 1, 1987)
or before some time period (e.g., before six months ago).
The user may also request that all historical data be purged so that
only the current data in the relation is stored.
.pp
POSTGRES also supports versions of relations.
A version of a relation can be created from a relation or a snapshot.
A version is created by specifying the base relation as shown
in the command
.BE
\fBcreate version\fP MYPEOPLE \fBfrom\fP PERSON
.EE
that creates a version, named \fIMYPEOPLE\fP, derived from
the \fIPERSON\fP relation.
Data can be retrieved from and updated in a version just like a relation.
Updates to the version do not modify the base relation.
However, updates to the base relation are propagated to
the version unless the value has been modified.
For example, if George's birthdate is changed in \fIMYPEOPLE\fP, 
a \fBreplace\fP command that changes
his birthdate in \fIPERSON\fP will not be propagated to \fIMYPEOPLE\fP.
.pp
If the user does not want updates to the base relation to propagate to
the version, he can create a version of a snapshot.
A snapshot is a copy of the current contents of a relation 
\*([[AdL80\*(]].
A version of a snapshot is created by the following command:
.BE
\fBcreate version\fP YOURPEOPLE
\fBfrom\fP PERSON[``now'']
.EE
The snapshot version can be updated directly by issuing update commands
on the version.
But, updates to the base relation are not propagated to the version.
.pp
A \fBmerge\fP command is provided to merge changes 
made to a version back into the base relation.
An example of this command is
.BE
\fBmerge\fP YOURPEOPLE \fBinto\fP PERSON
.EE
that will merge the changes made to \fIYOURPEOPLE\fP back into 
\fIPERSON\fP.
The \fBmerge\fP command uses a semi-automatic procedure to resolve 
updates to the underlying relation and the version that conflict 
\*([[Gae84\*(]].
.pp
This section described most of the data definition and data manipulation
commands in POSTQUEL.
The commands that were not described are the commands for defining
rules,
utility commands that only affect the performance of the system 
(e.g., \fBdefine index\fP and \fBmodify\fP),
and other miscellaneous utility commands (e.g., \fBdestroy\fP and \fBcopy\fP).
The next section describes the type system for relation attributes.
.sh 1 "Data Types"
.pp
POSTGRES provides a collection of atomic and structured types.
The predefined atomic types include:
\fIint2\fP, \fIint4\fP, \fIfloat4\fP, \fIfloat8\fP,
\fIbool\fP, \fIchar\fP, and \fIdate\fP.
The standard arithmetic and comparison operators are provided
for the numeric and date data types and the standard string and comparison
operators for character arrays.
Users can extend the system by adding new atomic types using an abstract
data type (ADT) definition facility.
.pp
All atomic data types are defined to the system as ADT's.
An ADT is defined by specifying the type name, the length of the
internal representation in bytes, procedures for converting from an external
to internal representation for a value and from an internal
to external representation, and a default value.
The command
.BE
\fBdefine type\fP int4 \fBis\fP (InternalLength = 4,
	InputProc = CharToInt4,
	OutputProc = Int4ToChar, Default = ``0'')
.EE
defines the type \fIint4\fP which is predefined in the system.
\fICharToInt4\fP and \fIInt4ToChar\fP are procedures that are coded in
a conventional programming language (e.g., C) and  defined to the system
using the commands described in section 4.
.pp
Operators on ADT's are defined by specifying the 
the number and type of operands, the return type, 
the precedence and associativity
of the operator, and the procedure that implements it.
For example, the command
.BE
\fBdefine operator\fP ``+''(int4, int4) \fBreturns\f int4
	\fBis\fP (Proc = Plus, Precedence = 5,
		Associativity = ``left'')
.EE
defines the plus operator.
Precedence is specified by a number.
Larger numbers imply higher precedence.
The predefined operators have the precedences shown in figure 2.
.(z
.hl
.TS
center box;
c | l.
\fBPrecedence	Operators\fP
=
80	\(ua
_
70	\fBnot\fP \- (unary)
_
60	* /
_
50	+ \- (binary)
_
40	< \(<= > \(>=
_
30	= \(!=
_
20	\fBand\fP
_
10	\fBor\fP
.TE
.sp
.ce
Figure 2: Predefined operators precedence.
.hl
.)z
These precedences can be changed by changing the operator definitions.
Associativity is either left or right depending on the semantics desired.
This example defined an operator denoted by a symbol (i.e., ``+'').
Operators can also be denoted by identifiers as shown below.
.pp
Another example of an ADT definition is the following command that
defines an ADT that represents boxes:
.BE
\fBdefine type\fP box \fBis\fP (InternalLength = 16,
	InputProc = CharToBox,
	OutputProc = BoxToChar, Default = ``'')
.EE
The external representation of a box is a character string
that contains two points that represent the upper-left and lower-right
corners of the box.
With this representation, the constant
.BE
``20,50:10,70''
.EE
describes a box whose upper-left corner is at (20, 50) and lower-right
corner is at (10, 70).
\fICharToBox\fP takes a character string like this one and returns a 16 byte
representation of a box (e.g., 4 bytes per x- or y-coordinate value).
\fIBoxToChar\fP is the inverse of \fICharToBox\fP
.pp
Comparison operators can be defined on ADT's that can be used in
access methods or optimized in queries.
For example, the definition
.BE
\fBdefine operator\fP AE(box, box) \fBreturns\f bool
	\fBis\fP (Proc = BoxAE, Precedence = 3,
		Associativity = ``left'', Sort = BoxArea, 
		Hashes, Restrict = AERSelect,
		Join = AEJSelect, Negator = BoxAreaNE)
.EE
defines an operator ``area equals'' on boxes.
In addition to the semantic information about the operator itself,
this specification includes information used by POSTGRES
to build indexes and to optimize queries using the operator.
For example, suppose the \fIPICTURE\fP relation was defined by
.BE
\fBcreate\fP PICTURE(Title = char[], Item = box)
.EE
and the query
.BE
\fBretrieve\fP (PICTURE.\fBall\fP)
\fBwhere\fP PICTURE.Item AE ``50,100:100,50''
.EE
was executed.
The \fISort\fP property of the \fIAE\fP operator specifies 
the procedure to be used to sort the relation if
a merge-sort join strategy was selected to implement the query.
It also specifies the procedure to use when building an ordered
index (e.g., B-Tree) on an attribute of type \fIbox\fP.
The \fIHashes\fP property indicates that this operator can be used
to build a hash index on a \fIbox\fP attribute.
Note that either type of index can be used to optimize the query above.
The \fIRestrict\fP and \fIJoin\fP properties specify the procedure that
is to be called by the query optimizer to compute the restrict and join
selectivities, respectively, of a clause involving the operator.
These selectivity properties specify procedures that will
return a floating point value between 0.0 and 1.0 that indicate the
attribute selectivity given the operator.
Lastly, the \fINegator\fP property specifies the procedure that is
to be used to compare two values when a query predicate requires the
operator to be negated as in
.BE
\fBretrieve\fP (PICTURE.\fBall\fP)
\fBwhere\fP \fBnot\fP (PICTURE.Item 
					AE ``50,100:100,50'')
.EE
The \fBdefine operator\fP command also may specify a procedure that
can be used if the query predicate includes an operator that is not
commutative.
For example, the commutator procedure for ``area less than'' (\fIALT\fP) 
is the procedure that implements ``area greater than or equal'' (\fIAGE\fP).
More details on the use of these properties is given elsewhere\*([<\*([[Sto86b\*(]]\*(>].
.pp
Type-constructors are provided to define structured types
(e.g., arrays and procedures) that can be used to represent complex data.
An \fIarray\fP type-constructor can be used to define a 
variable- or fixed-size array.
A fixed-size array is declared by specifying the element
type and upper bound of the array as illustrated by
.BE
\fBcreate\fP PERSON(Name = char[25])
.EE
which defines an array of twenty-five characters.
The elements of the array are referenced by indexing the
attribute by an integer between 1 and 25 (e.g., ``\fIPERSON.Name[4]\fP''
references the fourth character in the person's name).
.pp
A variable-size array is specified by omitting the upper bound in the
type constructor.
For example, a variable-sized array of characters
is specified by ``char[].''
Variable-size arrays are referenced by indexing the
attribute by an integer between 1 and the current upper bound
of the array.
The predefined function \fIsize\fP returns the current upper bound.
POSTGRES does not impose a limit on the size of a
variable-size array.
Built-in functions are provided to append arrays and to fetch
array slices.
For example, two character arrays can be appended using the
concatenate operator (``+'') and an array slice containing characters
2 through 15 in an attribute named \fIx\fP can be fetched by the 
expression ``x[2:15].''
.pp
The second type-constructor allows values of type procedure to be stored in
an attribute.
Procedure values are represented by a sequence of POSTQUEL commands.
The value of an attribute of type procedure is a relation
because that is what a \fBretrieve\fP command returns.
Moreover, the value may include tuples from different relations
(i.e., of different types) because a procedure composed of two 
\fBretrieve\fP commands returns the union of both commands.
We call a relation with different tuple types a \fImultirelation\fP.
The POSTGRES programming language interface provides a cursor-like
mechanism, called a \fIportal\fP, to fetch values from multirelations
\*([[StR86\*(]].
However, they are not stored by the system (i.e., only relations
are stored).
.pp
The system provides two kinds of procedure type-constructors: 
variable and parameterized.
A variable procedure-type allows a different POSTQUEL procedure
to be stored in each tuple while parameterized procedure-types
store the same procedure in each tuple but with different parameters.
We will illustrate the use of a variable procedure-type 
by showing another way to represent student majors.
Suppose a \fIDEPARTMENT\fP relation was defined with the following command:
.BE
\fBcreate\fP DEPARTMENT(Name = char[25],
	Chair = char[25], ...)
.EE
A student's major(s) can then be represented by a procedure in the \fISTUDENT\fP
relation that retrieves the appropriate \fIDEPARTMENT\fP tuple(s).
The \fIMajors\fP attribute would be declared as follows:
.BE
\fBcreate\fP STUDENT(..., Majors = postquel, ...)
.EE
Data type \fIpostquel\fP represents a procedure-type.
The value in \fIMajors\fP will be a query that fetches the
department relation tuples that represent the student's minors.
The following command appends a student to the database who has
a double major in
mathematics and computer science:
.BE
\fBappend\fP STUDENT( Name = ``Smith'', ...,
	Majors = 
		``\fBretrieve\fP (D.\fBall\fP)
		\ \fBfrom\fP D \fBin\fP DEPARTMENT
		\ \fBwhere\fP\ D.Name = ``Math''
		\ \ \ \ \ \ \fBor\fP\|\ D.Name = ``CS'''')
.EE
.pp
A query that references the \fIMajors\fP attribute 
returns the string that contains the POSTQUEL commands.
However, two notations are provided that will execute the query
and return the result rather than the definition.
First, nested-dot notation implicitly executes the query as illustrated by
.BE
\fBretrieve\fP (S.Name, S.Majors.Name)
\fBfrom\fP S \fBin\fP STUDENT
.EE
which prints a list of names and majors of students.
The result of the query in \fIMajors\fP is implicitly joined with the 
tuple specified by the rest of the target-list.
In other words, if a student has two majors, this query will return
two tuples with the \fIName\fP attribute repeated.
The implicit join is performed to guarantee that a relation is returned.
.pp
The second way to execute the query is to use the \fBexecute\fP
command.
For example, the query
.BE
\fBexecute\fP (S.Majors)
\fBfrom\fP S \fBin\fP STUDENT
\fBwhere\fP S.Name = ``Smith''
.EE
returns a relation that contains \fIDEPARTMENT\fP tuples
for all of Smith's majors.
.pp
Parameterized procedure-types are used when the query to be stored
in an attribute is nearly the same for every tuple.
The query parameters can be taken from other attributes in the tuple
or they may be explicitly specified.
For example, suppose an attribute in \fISTUDENT\fP
was to represent the student's current class list.
Given the following definition for enrollments:
.BE
\fBcreate\fP ENROLLMENT(Student = char[25],
	Class = char[25])
.EE
Bill's class list can be retrieved by the query
.BE
\fBretrieve\fP (ClassName = E.Class)
\fBfrom\fP E \fBin\fP ENROLLMENT
\fBwhere\fP E.Student = ``Bill''
.EE
This query will be the same for every student except for the 
constant that specifies the student's name.
.pp
A parameterized procedure-type could be defined to represent
this query as follows:
.BE
\fBdefine type\fP classes \fBis\fP
	\fBretrieve\fP (ClassName = E.Class)
	\fBfrom\fP E \fBin\fP ENROLLMENT
	\fBwhere\fP E.Student = $.Name
\fBend\fP
.EE
The dollar-sign symbol (``$'') refers to the tuple in which 
the query is stored (i.e., the current tuple).
The parameter for each instance of this type (i.e., a query)
is the \fIName\fP attribute in the tuple in which the instance is stored.
This type is then used in the \fBcreate\fP command as follows
.BE
\fBcreate\fP STUDENT(Name = char[25], ..., 
	ClassList = classes)
.EE
to define an attribute that represents the student's current class list.
This attribute can be used in a query
to return a list of students and the classes they are taking:
.BE
	\fBretrieve\fP (S.Name, S.ClassList.ClassName)
.EE
Notice that for a particular \fISTUDENT\fP tuple, the
expression ``\fI$.\|Name\fP'' in the query refers to the name of that student.
The symbol ``$'' can be thought of as a tuple-variable bound to
the current tuple.
.pp
Parameterized procedure-types are extremely useful types, but
sometimes it is inconvenient to store the parameters explicitly
as attributes in the relation.
Consequently, a notation is provided that allows the 
parameters to be stored in the procedure-type value.
This mechanism can be used to simulate attribute types that
reference tuples in other relations.
For example, suppose you wanted a type that referenced a tuple
in the \fIDEPARTMENT\fP relation defined above.
This type can be defined as follows:
.BE
\fBdefine type\fP DEPARTMENT(int4) \fBis\fP
	\fBretrieve\fP (DEPARTMENT.\fBall\fP)
	\fBwhere\fP DEPARTMENT.oid = $1
\fBend\fP
.EE
The relation name can be used for the type name
because relations, types, and procedures have separate name
spaces.
The query in type \fIDEPARTMENT\fP will retrieve a specific department
tuple given a unique object identifier (\fIoid\fP\|) of the
tuple.
Each relation has an implicitly defined attribute named \fIoid\fP
that contains the tuple's unique identifier.
The \fIoid\fP attribute can be accessed
but not updated by user queries.
\fIOid\fP values are created and maintained by the POSTGRES storage
system\*([<\*([[Sto87\*(]]\*(>].
The formal argument to this procedure-type is the type of an object
identifier.
The parameter is referenced inside the 
definition by ``\fI$n\fP'' where \fIn\fP is the parameter number.
.pp
An actual argument is supplied when a value is assigned to an
attribute of type \fIDEPARTMENT\fP.
For example, a \fICOURSE\fP relation can be defined that represents
information about a specific course including the department that
offers it.
The \fBcreate\fP command is:
.BE
\fBcreate\fP COURSE(Title = char[25],
	Dept = DEPARTMENT, ...)
.EE
The attribute \fIDept\fP represents the department that offers
the course.
The following query adds a course to the database:
.BE
\fBappend\fP COURSE(
	Title = ``Introductory Programming'',
	Dept = DEPARTMENT(D.oid))
\fBfrom\fP D \fBin\fP DEPARTMENT
\fBwhere\fP D.Name = ``computer science''
.EE
The procedure \fIDEPARTMENT\fP called in the target-list is 
implicitly defined by the ``\fBdefine type\fP'' command.
It constructs a value of the specified type given actual arguments
that are type compatible with the formal arguments, in this case an \fIint4\fP.
.pp
Parameterized procedure-types that represent references to tuples
in a specific relation are so commonly used that we plan to provide
automatic support for them.
First, every relation created will have a type that represents a reference
to a tuple implicitly defined similar to the \fIDEPARTMENT\fP 
type above.
And second, it will be possible to assign a tuple-variable directly
to a tuple reference attribute.
In other words, the assignment to the attribute \fIDept\fP that is 
written in the query above as
.BE
 ... Dept = DEPARTMENT(D.oid) ...
.EE
can be written as
.BE
 ... Dept = D ...
.EE
.pp
Parameterized procedure-types can also be used to implement a
type that references a tuple in an arbitrary relation.
The type definition is:
.BE
\fBdefine type\fP tuple(char[], int4) \fBis\fP
	\fBretrieve\fP ($1.all)
	\fBwhere\fP $1.oid = $2
\fBend\fP
.EE
The first argument is the name of the relation and the second argument
is the \fIoid\fP of the desired tuple in the relation.
In effect, this type defines a reference to an arbitrary tuple in the
database.
.pp
The procedure-type \fItuple\fP can be used to create a relation 
that represents people who help with fund raising:
.BE
\fBcreate\fP VOLUNTEER(Person = tuple,
	TimeAvailable = integer, ...)
.EE
Because volunteers may be students, employees, or people who are
neither students nor employees, the attribute \fIPerson\fP must 
contain a reference to a tuple in an arbitrary relation.
The following command appends all students to \fIVOLUNTEER\fP:
.BE
\fBappend\fP VOLUNTEER(
	Person = tuple(relation(S), S.oid))
\fBfrom\fP S \fBin\fP STUDENT*
.EE
The predefined function \fIrelation\fP returns the name of the relation
to which the tuple-variable \fIS\fP is bound.
.pp
The type \fItuple\fP will also be special-cased to make it more convenient.
\fITuple\fP will be a predefined type and it will be possible to
assign tuple-variables directly to attributes of the type.
Consequently, the assignment to \fIPerson\fP written above as
.BE
 ... Person = tuple(relation(S), S.oid) ...
.EE
can be written
.BE
 ... Person = S ...
.EE
We expect that as we get more experience with POSTGRES applications
that more types may be special-cased.
.sh 1 "User-Defined Procedures"
.pp
This section describes language constructs for
adding user-defined procedures to POSTQUEL.
User-defined procedures are written in a conventional programming
language and are used to implement ADT operators or to move a
computation from a front-end application process to the back-end
DBMS process.
.pp
Moving a computation to the back-end opens up possibilities for the
DBMS to precompute a query that includes the computation.
For example, suppose that a front-end application needed to fetch the
definition of a form from a database and to construct a main-memory
data structure that the run-time forms system used to display
the form on the terminal screen for data entry or display.
A conventional relation database design would store the form 
components (e.g., titles and field definitions for different types of 
fields such as scalar
fields, table fields, and graphics fields) in many different relations.
An example database design is:
.BE
\fBcreate\fP FORM(FormName, ...)
.sp 0.5v
\fBcreate\fP FIELDS(FormName, FieldName, 
	Origin, Height, Width, 
	FieldKind, ...)
.sp 0.5v
\fBcreate\fP SCALARFIELD(FormName,
	FieldName, DataType, 
	DisplayFormat, ...)
.sp 0.5v
\fBcreate\fP TABLEFIELD(FormName, 
	FieldName, NumberOfRows, ...)
.sp 0.5v
\fBcreate\fP TABLECOLUMNS(FormName, 
	FieldName, ColumnName, Height, 
	Width, FieldKind, ...)
.EE
The query that fetches the form from the database
must execute at least one query per table and
sort through the return tuples to construct the main-memory
data structure.
This operation must take less than two seconds for an interactive
application.
Conventional relational DBMS's cannot satisfy this time constraint.
.pp
Our approach to solving this problem is to move the computation that
constructs the main-memory data structure to the database process.
Suppose the procedure \fIMakeForm\fP built the data structure
given the name of a form.
Using the parameterized procedure-type mechanism defined above
an attribute can be added to the \fIFORM\fP relation that
stores the form representation computed by this procedure.
The commands
.BE
\fBdefine type\fP formrep \fBis\fP
	\fBretrieve\fP (rep = MakeForm($.FormName))
\fBend\fP
\fBaddattribute\fP (FormName, ..., 
	FormDataStructure = formrep) 
\fBto\fP FORM
.EE
define the procedure type and add an attribute to the \fIFORM\fP
relation.
.pp
The advantage of this representation is that POSTGRES can precompute
the answer to a procedure-type attribute and store it in the tuple.
By precomputing the main-memory data structure representation, the
form can be fetched from the database by a single-tuple retrieve:
.BE
\fBretrieve\fP (x = FORM.FormDataStructure)
\fBwhere\fP FORM.FormName = ``foo''
.EE
The real-time constraint to fetch and display a form can be easily
met if all the program must do is a single-tuple retrieve 
to fetch the data structure and call the library procedure to display it.
This example illustrates the advantage of moving a computation
(i.e., constructing a main-memory data structure) from
the application process to the DBMS process.
.pp
A procedure is defined to the system by specifying the names and
types of the arguments, the return type, the language it is
written in, and where the source and object code is stored.
For example, the definition
.BE
\fBdefine procedure\fP AgeInYears(date) \fBreturns\fP int4
	\fBis\fP (language = ``C'', filename = ``AgeInYears'')
.EE
defines a procedure \fIAgeInYears\fP that takes a \fIdate\fP value
and returns the age of the person.
The argument and return types are specified using POSTGRES types.
When the procedure is called, it is passed the arguments in the POSTGRES
internal representation for the type.
We plan to allow procedures to be written in several different
languages including C and Lisp which are the two languages being
used to implement the system.
.pp
POSTGRES stores the information about a procedure in the
system catalogs and dynamically loads the object code 
when it is called in a query.
The following query uses the \fIAgeInYears\fP procedure to 
retrieve the names and ages of all people in the example database:
.BE
\fBretrieve\fP (P.Name, 
		Age = AgeInYears(P.Birthdate))
\fBfrom\fP P \fBin\fP PERSON*
.EE
.pp
User-defined procedures can also take tuple-variable arguments.
For example, the following command defines a procedure, called
\fIComp\fP, that takes an EMPLOYEE tuple and computes the person's
compensation according to some formula that involves 
several attributes in the tuple (e.g.,
the employee's status, job title, and salary):
.BE
\fBdefine procedure\fP Comp(EMPLOYEE) 
	\fBreturns\fP int4 \fBis\fP (language = ``C'',
	filename = ``Comp1'')
.EE
Recall that a parameterized procedure-type is defined for each
relation automatically so the type \fIEMPLOYEE\fP
represents a reference to a tuple in the \fIEMPLOYEE\fP relation.
This procedure is called in the following query:
.BE
\fBretrieve\fP (E.Name, Compensation = Comp(E))
\fBfrom\fP E \fBin\fP EMPLOYEE
.EE
The C function that implements this procedure is passed a data
structure that contains the names, types, and values of the attributes
in the tuple.
.pp
User-defined procedures can be passed tuples in other relations
that inherit the attributes in the relation declared as the
argument to the procedure.
For example, the \fIComp\fP procedure defined for 
the \fIEMPLOYEE\fP relation can be passed a \fISTUDEMP\fP tuple as in 
.BE
\fBretrieve\fP (SE.Name, 
		Compensation = Comp(SE))
\fBfrom\fP SE \fBin\fP STUDEMP
.EE
because \fISTUDEMP\fP inherits data attributes from 
\fIEMPLOYEE\fP.
.pp
The arguments to procedures that take relation tuples as arguments
must be passed in a self-describing
data structure because the procedure can be passed tuples from different 
relations.
Attributes inherited from other relations
may be in different positions in the relations.
Moreover, the values passed for the same attribute name 
may be different types (e.g., the definition of an inherited attribute 
may be overridden with a different type).
The self-describing data structure is a list of arguments, one
per attribute in the tuple to be passed, with the following structure
.BE
(AttrName, AttrType, AttrValue)
.EE
The procedure code will have to search the list to find the desired
attribute.
A library of routines is provided that will hide this structure
from the programmer.
The library will include routines to get the type and value of an
attribute given the name of the attribute.
For example, the following code fetches the value of the
\fIBirthdate\fP attribute:
.BE
GetValue(``Birthdate'')
.EE
The problem of variable argument lists arises in all 
object-oriented programming languages and
similar solutions are used.
.pp
The model for procedure inheritance is nearly identical to
method inheritance in object-oriented programming languages
\*([[StB86\*(]].
Procedure inheritance uses the data inheritance hierarchy and
similar inheritance rules
except that a rule is provided to select a procedure when
an inheritance conflict arises.
For example, suppose that a \fIComp\fP procedure was defined for
\fISTUDENT\fP as well as for \fIEMPLOYEE\fP.
The definition of the second procedure might be:
.BE
\fBdefine procedure\fP Comp(STUDENT)
	\fBreturns\fP int4 \fBis\fP (language = ``C'',
	filename = ``Comp2'')
.EE
A conflict arises when the query on \fISTUDEMP\fP above is executed because
the system does not know which \fIComp\fP procedure to call (i.e.,
the one for \fIEMPLOYEE\fP or 
the one for \fISTUDENT\fP\|).
The procedure called is selected from among the procedures
that take a tuple from the relation specified by the actual argument
\fISTUDEMP\fP or any relation from which attributes
in the actual argument are inherited (e.g., \fIPERSON\fP, \fIEMPLOYEE\fP,
and \fISTUDENT\fP\|).
.pp
Each relation has an \fIinheritance precedence list\fP (IPL)
that is used to resolve the conflict.
The list is constructed by 
starting with the relation itself and doing a depth-first search up the 
inheritance hierarchy starting with the first relation specified in 
the \fBinherits\fP-clause.
For example, the \fBinherits\fP-clause for \fISTUDEMP\fP is
.BE
 ... \fBinherits\fP (STUDENT, EMPLOYEE)
.EE
and its IPL is
.BE
(STUDEMP, STUDENT,
\ EMPLOYEE, PERSON)
.EE
\fIPERSON\fP appears after \fIEMPLOYEE\fP rather than after 
\fISTUDENT\fP where it
would appear in a depth-first search because both \fISTUDENT\fP and
\fIEMPLOYEE\fP inherit attributes from \fIPERSON\fP (see figure 1).
In other words, all but the last occurrence of a relation in
the depth-first ordering of the hierarchy is deleted.\**
.(f
\** We are using a rule that is similar to the rule for the new Common Lisp
object model\*([<\*([[Boe86\*(]]\*(>].
It is actually slightly more complicated than described here in order to 
eliminate some nasty cases that arise when there are cycles
in the inheritance hierarchy.
.)f
.pp
When a procedure is called and passed a tuple as the first argument,
the actual procedure invoked is the first definition found with
the same name when the
procedures that take arguments from the relations in the ILP of the argument
are searched in order.
In the example above, the \fPComp\fP procedure defined for \fISTUDENT\fP 
is called because there is no procedure named \fIComp\fP defined for
\fISTUDEMP\fP and \fISTUDENT\fP is the next relation in the IPL.
.pp
The implementation of this procedure selection rule is relatively easy.
Assume that two system catalogs are defined:
.BE
PROCDEF(ProcName, ArgName, ProcId)
IPL(RelationName, IPLEntry, SeqNo)
.EE
where \fIPROCDEF\fP has an entry for each procedure defined and
\fIIPL\fP maintains the precedence lists for all relations.
The attributes in \fIPROCDEF\fP represent the procedure name, the argument type
name, and the unique identifier for the procedure code
stored in another catalog.
The attributes in \fIIPL\fP represent the relation, an IPL entry for the
relation, and the sequence number for that entry in the IPL of the relation.
With these two catalogs, the query to find the correct procedure for
the call
.BE
Comp(STUDEMP)
.EE
is\**
.(f
\** This query uses a QUEL-style aggregate function.
.)f
.BE
\fBretrieve\fP (P.ProcId)
\fBfrom\fP P \fBin\fP PROCDEF, I \fBin\fP IPL
\fBwhere\fP P.ProcName = ``Comp''
  \fBand\fP I.RelationName = ``STUDEMP''
  \fBand\fP I.IPLEntry = P.ArgName
  \fBand\fP I.SeqNo = MIN(I.SeqNo 
	\fBby\fP I.RelationName
	\fBwhere\fP\ \ I.IPLEntry = P.ArgName
          \fBand\fP P.ProcName = ``Comp''
          \fBand\fP I.RelationName = ``STUDEMP'')
.EE
This query can be precomputed to speed up procedure selection.
.pp
In summary, the major changes required to support procedure inheritance
is 1) allow tuples as arguments to procedures, 2) define a representation
for variable argument lists, and 3) implement a procedure selection
mechanism.
This extension to the relational model is relatively straightforward
and only requires a small number of changes to the DBMS implementation.
.sh 1 "Other Data Models"
.pp
This section compares the POSTGRES data model to 
semantic, functional, and object-oriented data models.
.pp
Semantic and functional data models 
\*([[Dae85\*(],HaM81\*(],Mye80\*(],Shi81\*(],SmS77\*(],Zan83\*(]]
do not provide the flexibility provided by the model described here.
They cannot easily represent data with uncertain structure
(e.g., objects with shared subobjects that have different types).
.pp
Modeling ideas oriented toward complex objects\*([<\*([[HaL82\*(],LoP83\*(]]\*(>]
cannot deal with objects that have a variety of shared subobjects.  
POSTGRES uses procedures to represent shared subobjects
which does not have limitation on the types of subobjects
that are shared.  
Moreover, the nested-dot notation allows convenient access to selected 
subobjects, a feature not present in these systems.
.pp
Several proposals have been made to support data models
that contain non-first normal form relations 
\*([[Bae86\*(],Dae86\*(],Dee86\*(]].
The POSTGRES data model can be used to support non-first normal form
relations with procedure-types.  
Consequently, POSTGRES seems to contain a
superset of the capabilities of these proposals. 
.pp
Object-oriented data models\*([<\*([[Ane86\*(],CoM84\*(]]\*(>]
have modeling constructs to deal with uncertain structure.
For example, GemStone supports union types which can be used to
represent subobjects that have different types\*([<\*([[CoM84\*(]]\*(>].
Sharing of subobjects is represented by storing the subobjects 
as separate records and connecting them to a parent object with 
pointer-chains.
Precomputed procedure values will, in our opinion, make POSTGRES 
performance competitive with pointer-chain proposals.
The preformance problem with pointer-chains will be most obvious
when an object is composed of a large number of subobjects.
POSTGRES will avoid this problem because the pointer-chain is represented
as a relation and the system can use all of the query processing and 
storage structure techniques available in the system to represent it.
Consequently, POSTGRES uses a different approach that supports the
same modeling capabilities and an implementation that may have
better performance.
.pp
Finally, the POSTGRES data model could claim to be object-oriented, though we
prefer not to use this word because few people agree on exactly what
it means.
The data model provides the same capabilities as an object-oriented model,
but it does so without discarding the relational model and without having
to introduce a new confusing terminology.
.sh 1 "Summary"
.pp
The POSTGRES data model uses the ideas of abstract data types,
data of type procedure, and inheritance to extend the relational model.
These ideas can be used to simulate a variety of semantic data modeling
concepts (e.g., aggregation and generalization).
In addition, the same ideas can be used to support complex objects that
have unpredicatable composition and shared subobjects.
.sp 2
.(l C
.sz \n(sp
.b References
.)l
.sp
.[]
.[-
.ds [F AdL80
.ds [A M\*(p]\*(a]E\*(p] Adiba
.as [A \*(n]B\*(p]\*(a]G\*(p] Lindsay
.ds [T Database Snapshots
.ds [J Proc. 6th Int. Conf. on Very Large Databases
.ds [C Montreal, Canada
.ds [D Oct. 1980
.nr [P 1
.ds [P 86-91
.][
.[-
.ds [F Ane86
.ds [A T\*(p] Anderson
.as [A \*(n]et.\ al.
.ds [T PROTEUS: Objectifying the DBMS User Interface
.ds [J Proc. Int. Wkshp on Object-Oriented Database Systems
.ds [C Asilomar, CA
.ds [D Sep. 1986
.][
.[-
.ds [F Bae86
.ds [A D\*(p] Batory
.as [A \*(n]et.al.
.ds [T GENESIS: A Reconfigurable Database Management System
.ds [R Tech. Rep. 86-07
.ds [I Dept. of Comp. Sci., Univ. of Texas at Austin
.ds [D 1986
.][
.[-
.ds [F Boe86
.ds [A D\*(p]\*(a]B\*(p] Bobrow
.as [A \*(n]et.al.
.ds [T COMMONLOOPS: Merging Lisp and Object-Oriented Programming
.ds [J Proc. 1986 ACM OOPSLA Conf.
.ds [C Portland, OR
.ds [D Sep. 1986
.nr [P 1
.ds [P 17-29
.][
.[-
.ds [F Cod70
.ds [A E\*(p]\*(a]F\*(p] Codd
.ds [T A Relational Model of Data for Large Shared Data Bases
.ds [J Comm. of the ACM
.ds [D JUNE 1970
.][
.[-
.ds [F CoM84
.ds [A G\*(p] Copeland
.as [A \*(n]D\*(p] Maier
.ds [T Making Smalltalk a Database System
.ds [J Proc. 1984 ACM-ACM-S\&IGMOD Conf. on Management of Data Int. Conf. on the Mgt. of Data
.ds [D June 1984
.][
.[-
.ds [F Dae86
.ds [A P\*(p] Dadam
.as [A \*(n]et.al.
.ds [T A DBMS Prototype to Support Extended NF2 Relations: An Integrated View on Flat Tables and Hierarchies
.ds [J Proc. ACM-ACM-S\&IGMOD Conf. on Management of Data Conf. on Mgt. of Data
.ds [C Washington, DC
.ds [D May 1986
.][
.[-
.ds [F Dae85
.ds [A U\*(p] Dayal
.as [A \*(n]et.al.
.ds [T A Knowledge-Oriented Database Management System
.ds [J Proc. Islamorada Conference on Large Scale Knowledge Base and Reasoning Systems
.ds [D Feb. 1985
.][
.[-
.ds [F Dee86
.ds [A U\*(p] Deppisch
.as [A \*(n]et.al.
.ds [T A Storage System for Complex Objects
.ds [J Proc. Int. Wkshp on Object-Oriented Database Systems
.ds [C Asilomar, CA
.ds [D Sep. 1986
.][
.[-
.ds [F Gae84
.ds [A H\*(p] Garcia-Molina
.as [A \*(n]et.al.
.ds [T DataPatch: Integrating Inconsistent Copies of a Database after a Partition
.ds [R Tech. Rep. Tech. Rep.# 304
.ds [I Dept. Elec. Eng. and Comp. Sci.
.ds [C Princeton, NJ
.ds [D 1984
.][
.[-
.ds [F HaM81
.ds [A M\*(p] Hammer
.as [A \*(n]D\*(p] McLeod
.ds [T Database Description with SDM
.ds [J ACM-Trans. Database Systems
.ds [D Sep. 1981
.][
.[-
.ds [F HaL82
.ds [A R\*(p] Haskins
.as [A \*(n]R\*(p] Lorie
.ds [T On Extending the Functions of a Relational Database System
.ds [J Proc. 1982 ACM-ACM-S\&IGMOD Conf. on Management of Data Conference on Management of Data
.ds [C Orlando, FL
.ds [D JUNE 1982
.][
.[-
.ds [F HSW75
.ds [A G\*(p] Held
.as [A \*(c]M\*(p]\*(a]R\*(p] Stonebraker
.as [A \*(m]E\*(p] Wong
.ds [T INGRES -- A Relational Data Base System
.ds [J Proc. AFIPS AFIPS Nat. Computer Conf.
.ds [D 1975
.nr [P 1
.ds [P 409-416
.][
.[-
.ds [F Kue84
.ds [A R\*(p] Kung
.as [A \*(n]et.al.
.ds [T Heuristic Search in Database Systems
.ds [J Proc. 1st International Workshop on Expert Data Bases
.ds [C Kiowah, SC
.ds [D Oct. 1984
.][
.[-
.ds [F LoP83
.ds [A R\*(p] Lorie
.as [A \*(n]W\*(p] Plouffee
.ds [T Complex Objects and Their Use in Design Transactions
.ds [J Proc. Engineering Design Applications Stream of ACM-IEEE Data Base Week
.ds [C San Jose, CA
.ds [D May 1983
.][
.[-
.ds [F Mye80
.ds [A J\*(p] Myloupoulis
.as [A \*(n]et.al.
.ds [T A Language Facility for Designing Database Intensive Applications
.ds [J ACM-Trans. Database Systems
.ds [D JUNE 1980
.][
.[-
.ds [F Row86
.ds [A L\*(p]\*(a]A\*(p] Rowe
.ds [T A Shared Object Hierarchy
.ds [J Proc. Int. Wkshp on Object-Oriented Database Systems
.ds [C Asilomar, CA
.ds [D Sep. 1986
.][
.[-
.ds [F Shi81
.ds [A D\*(p] Shipman
.ds [T The Functional Model and the Data Language Daplex
.ds [J ACM-Trans. Database Systems
.ds [D Mar. 1981
.][
.[-
.ds [F SmS77
.ds [A J\*(p] Smith
.as [A \*(n]D\*(p] Smith
.ds [T Database Abstractions: Aggregation and Generalization
.ds [J ACM Trans. Database Systems
.ds [D JUNE 1977
.][
.[-
.ds [F StB86
.ds [A M\*(p] Stefik
.as [A \*(n]D\*(p]\*(a]G\*(p] Bobrow
.ds [T Object-Oriented Programming: Themes and Variations
.ds [J The AI Magazine
.ds [V 6
.ds [N 4
.ds [D Winter 1986
.nr [P 1
.ds [P 40-62
.][
.[-
.ds [F Ste84
.ds [A M\*(p]\*(a]R\*(p] Stonebraker
.as [A \*(n]et.\ al.
.ds [T QUEL as a Data Type
.ds [J Proc. 1984 ACM-ACM-S\&IGMOD Conf. on Management of Data Conf. on the Mgt. of Data
.ds [D May 1984
.][
.[-
.ds [F Sto85
.ds [A M\*(p]\*(a]R\*(p] Stonebraker
.ds [T Triggers and Inference in Data Base Systems
.ds [J Proc. Islamorada Conference on Large Scale Knowledge Base and Reasoning Systems
.ds [D Feb. 1985
.][
.[-
.ds [F StR86
.ds [A M\*(p]\*(a]R\*(p] Stonebraker
.as [A \*(n]L\*(p]\*(a]A\*(p] Rowe
.ds [T The Design of POSTGRES
.ds [J Proc. 1986 ACM-ACM-S\&IGMOD Conf. on Management of Data Int. Conf. on the Mgt. of Data
.ds [D June 1986
.][
.[-
.ds [F Sto86a
.ds [A M\*(p]\*(a]R\*(p] Stonebraker
.ds [T Object Management in POSTGRES Using Procedures
.ds [J Proc. Int. Wkshp on Object-Oriented Database Systems
.ds [C Asilomar, CA
.ds [D Sep. 1986
.][
.[-
.ds [F Sto86b
.ds [A M\*(p]\*(a]R\*(p] Stonebraker
.ds [T Inclusion of New Types in Relational Data Base Systems
.ds [J Proc. Second Int. Conf. on Data Base Eng.
.ds [C Los Angeles, CA
.ds [D Feb. 1986
.][
.[-
.ds [F Sto87
.ds [A M\*(p]\*(a]R\*(p] Stonebraker
.ds [T POSTGRES Storage System
.ds [R Submitted for publication
.ds [D 1987
.][
.[-
.ds [F Zan83
.ds [A C\*(p] Zaniola
.ds [T The Database Language GEM
.ds [J Proc. 1983 ACM-ACM-S\&IGMOD Conf. on Management of Data Conference on Management of Data
.ds [C San Jose, CA.
.ds [D May 1983
.][
