casacore
Loading...
Searching...
No Matches
UDFBase.h
Go to the documentation of this file.
1//# UDFBase.h: Abstract base class for a user-defined TaQL function
2//# Copyright (C) 2010
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: aips2-request@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25//#
26//# $Id: UDFBase.h 21262 2012-09-07 12:38:36Z gervandiepen $
27
28#ifndef TABLES_UDFBASE_H
29#define TABLES_UDFBASE_H
30
31//# Includes
32#include <casacore/casa/aips.h>
33#include <casacore/tables/TaQL/ExprNodeRep.h>
34#include <casacore/tables/Tables/Table.h>
35#include <casacore/tables/TaQL/TaQLStyle.h>
36#include <casacore/casa/Containers/Record.h>
37#include <casacore/casa/Containers/Block.h>
38#include <casacore/casa/stdmap.h>
39
40
41namespace casacore {
42
43 // <summary>
44 // Abstract base class for a user-defined TaQL function
45 // </summary>
46 //
47 // <synopsis>
48 // This class makes it possible to add user-defined functions (UDF) to TaQL.
49 // A UDF has to be implemented in a class derived from this class and can
50 // contain one or more user-defined functions.
51 // <br>A few functions have to be implemented in the class as described below.
52 // In this way TaQL can be extended with arbitrary functions, which can be
53 // normal functions as well as aggregate functions (often used with GROUPBY).
54 //
55 // A UDF is a class derived from this base class. It must contain the
56 // following member functions. See also the example below.
57 // <table border=0>
58 // <tr>
59 // <td><src>makeObject</src></td>
60 // <td>a static function to create an object of the UDF class. This function
61 // needs to be registered.
62 // </td>
63 // </tr>
64 // <tr>
65 // <td><src>setup</src></td>
66 // <td>this virtual function is called after the object has been created.
67 // It should initialize the object using the function arguments that
68 // can be obtained using the function <src>operands()</src>. The setup
69 // function should perform the following:
70 // <ul>
71 // <li>Define the data type of the result using <src>setDataType<src>.
72 // The data type should be derived from the data types of the function
73 // arguments. The possible data types are defined in class
74 // TableExprNodeRep.
75 // Note that a UDF can support multiple data types. For example, a
76 // function like <src>min</src> can be used for Int, Double, or a mix.
77 // Function 'checkDT' in class TableExprNodeMulti can be used to
78 // check the data types of the operands and determine the result
79 // data type.
80 // <li>Define if the function is an aggregate function calculating
81 // an aggregated value in a group (e.g., minimum or mean).
82 // <src>setAggregate</src> can be used to tell so.
83 // <li>Define the dimensionality of the result using <src>setNDim</src>.
84 // A value of 0 means a scalar. A value of -1 means an array with
85 // a dimensionality that can vary from row to row.
86 // <li>Optionally use <src>setShape</src> to define the shape if the
87 // results are arrays with a shape that is the same for all rows.
88 // It will also set ndim if setNDim was not used yet, otherwise
89 // it checks if it ndim matches.
90 // <li>Optionally set the unit of the result using <src>setUnit</src>.
91 // TaQL has full support of units, so UDFs should behave the same.
92 // It is possible to change the unit of the function arguments.
93 // For example:
94 // <ul>
95 // <li>a function like 'sin' can force its argument to be
96 // in radians; TaQL will scale the argument as needed. This can be
97 // done like
98 // <src>TableExprNodeUnit::adaptUnit (operands()[i], "rad");</src>
99 // <li>A function like 'asin' will have a result in radians.
100 // Such a UDF should set its result unit to rad.
101 // <li>A function like 'min' wants its arguments to have the same
102 // unit and will set its result unit to it. It can be done like:
103 // <src>setUnit (TableExprFuncNode::makeEqualUnits
104 // (operands(), 0, operands().size()));</src>
105 // </ul>
106 // See class TableExprFuncNode for more info about these functions.
107 // <li>Optionally define attributes as a Record object. They can be used
108 // by UDFs to tell something more about the type of value.
109 // <li>Optionally define if the result is a constant value using
110 // <src>setConstant</src>. It means that the function is not
111 // dependent on the row number in the table being queried.
112 // This is usually the case if all UDF arguments are constant.
113 // </ul>
114 // </td>
115 // </tr>
116 // <tr>
117 // <td><src>getXXX</src></td>
118 // <td>these are virtual get functions for each possible data type. The
119 // get functions matching the data types set by the setup
120 // function need to be implemented.
121 // The <src>get</src> functions have an argument TableExprId
122 // defining the table row (or record) for which the function has
123 // to be evaluated.
124 // If the UDF is an aggregate functions the TableExprId has to be
125 // upcasted to an TableExprIdAggr object from which all TableExprId
126 // objects in an aggregation group can be retrieved.
127 // <srcblock>
128 // const TableExprIdAggr& aid = TableExprIdAggr::cast (id);
129 // const vector<TableExprId>& ids = aid.result().ids(id.rownr());
130 // </srcblock>
131 // </td>
132 // </tr>
133 // </table>
134 //
135 // A UDF has to be made known to TaQL by adding it to the UDF registry with
136 // its name and 'makeObject' function.
137 // UDFs will usually reside in a shared library that is loaded dynamically.
138 // TaQL will load a UDF in the following way:
139 // <ul>
140 // <li> The UDF name used in TaQL consists of two parts: a library name
141 // and a function name separated by a dot. Both parts need to be given.
142 // Note that the library name can also be seen as a UDF scope, so
143 // different UDFs with equal names can be used from different libraries.
144 // A UDF should be registered with this full name.
145 // <br>The "USING STYLE" clause can be used to define a synonym for
146 // a (long) library name in the TaQLStyle object. The library part
147 // of the UDF will always be looked up in this synonym map.
148 // <li> If a UDF is not found in the registry, it will be tried to load
149 // a shared library using the library name part. The libraries tried
150 // to be loaded are lib<library>.so and libcasa_<library>.so.
151 // On Mac .dylib will be tried. If loaded successfully, a special
152 // function 'register_libname' will be called first. It should
153 // register each UDF in the shared library using UDFBase::register.
154 // </ul>
155 // </synopsis>
156 //
157 // <example>
158 // The following examples show a normal UDF function.
159 // <br>It returns True if the function argument matches 1.
160 // It can be seen that it checks if the argument is an integer scalar.
161 // <srcblock>
162 // class TestUDF: public UDFBase
163 // {
164 // public:
165 // TestUDF() {}
166 // // Registered function to create the UDF object.
167 // // The name of the function is not important here.
168 // static UDFBase* makeObject (const String&)
169 // { return new TestUDF(); }
170 // // Setup and check the details; result is a bool scalar value.
171 // virtual void setup (const Table&, const TaQLStyle&)
172 // {
173 // AlwaysAssert (operands().size() == 1, AipsError);
174 // AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt,
175 // AipsError);
176 // AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar,
177 // AipsError);
178 // setDataType (TableExprNodeRep::NTBool);
179 // setNDim (0); // scalar result
180 // setConstant (operands()[0].isConstant()); // constant result?
181 // }
182 // // Get the value for the given id.
183 // // It gets the value of the operand and checks if it is 1.
184 // Bool getBool (const TableExprId& id)
185 // { return operands()[0]->getInt(id) == 1; }
186 // };
187 // </srcblock>
188 // </example>
189
190 // <example>
191 // The following example shows an aggregate UDF function.
192 // It calculates the sum of the cubes of the values in a group.
193 // <srcblock>
194 // class TestUDFAggr: public UDFBase
195 // {
196 // public:
197 // TestUDFAggr() {}
198 // // Registered function to create the UDF object.
199 // // The name of the function is not important here.
200 // static UDFBase* makeObject (const String&) { return new TestUDFAggr(); }
201 // // Setup and check the details; result is an integer scalar value.
202 // // It aggregates the values of multiple rows.
203 // virtual void setup (const Table&, const TaQLStyle&)
204 // {
205 // AlwaysAssert (operands().size() == 1, AipsError);
206 // AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt, AipsError);
207 // AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar, AipsError);
208 // setDataType (TableExprNodeRep::NTInt);
209 // setNDim (0); // scalar
210 // setAggregate (True); // aggregate function
211 // }
212 // // Get the value of a group.
213 // // It aggregates the values of multiple rows.
214 // Int64 getInt (const TableExprId& id)
215 // {
216 // // Cast the id to a TableExprIdAggr object.
217 // const TableExprIdAggr& aid = TableExprIdAggr::cast (id);
218 // // Get the vector of ids for this group.
219 // const vector<TableExprId>& ids = aid.result().ids(id.rownr());
220 // // Get the values for all ids and accumulate them.
221 // Int64 sum3 = 0;
222 // for (vector<TableExprId>::const_iterator it=ids.begin();
223 // it!=ids.end(); ++it){
224 // Int64 v = operands()[0]->getInt(*it);
225 // sum3 += v*v*v;
226 // }
227 // return sum3;
228 // }
229 // };
230 // </srcblock>
231 // </example>
232 // More examples of UDF functions can be found in classes UDFMSCal
233 // and DirectionUDF.
234
236 {
237 public:
238 // The signature of a global or static member function creating an object
239 // of the UDF.
240 typedef UDFBase* MakeUDFObject (const String& functionName);
241
242 // Only default constructor is needed.
244
245 // Destructor.
246 virtual ~UDFBase();
247
248 // Evaluate the function and return the result.
249 // Their default implementations throw a "not implemented" exception.
250 // <group>
251 virtual Bool getBool (const TableExprId& id);
252 virtual Int64 getInt (const TableExprId& id);
253 virtual Double getDouble (const TableExprId& id);
254 virtual DComplex getDComplex (const TableExprId& id);
255 virtual String getString (const TableExprId& id);
256 virtual TaqlRegex getRegex (const TableExprId& id);
257 virtual MVTime getDate (const TableExprId& id);
264 // </group>
265
266 // Get the unit.
267 const String& getUnit() const
268 { return itsUnit; }
269
270 // Get the attributes.
271 const Record& getAttributes() const
272 { return itsAttributes; }
273
274 // Get the nodes in the function operands representing an aggregate function.
275 void getAggrNodes (vector<TableExprNodeRep*>& aggr);
276
277 // Get the nodes in the function operands representing a table column.
278 void getColumnNodes (vector<TableExprNodeRep*>& cols);
279
280 private:
281 // Set up the function object.
282 virtual void setup (const Table& table,
283 const TaQLStyle&) = 0;
284
285 protected:
286 // Get the operands.
287 std::vector<TENShPtr>& operands()
288 { return itsOperands; }
289
290 // Set the data type.
291 // This function must be called by the setup function of the derived class.
293
294 // Set the dimensionality of the results.
295 // <br> 0 means that the results are scalars.
296 // <br> -1 means that the results are arrays with unknown dimensionality.
297 // <br> >0 means that the results are arrays with that dimensionality.
298 // This function must be called by the setup function of the derived class.
300
301 // Set the shape of the results if it is fixed and known.
302 void setShape (const IPosition& shape);
303
304 // Set the unit of the result.
305 // If this function is not called by the setup function of the derived
306 // class, the result has no unit.
307 void setUnit (const String& unit);
308
309 // Set the attributes of the result.
310 // If this function is not called by the setup function of the derived
311 // class, the result has no attributes.
312 void setAttributes (const Record& attributes);
313
314 // Define if the result is constant (e.g. if all arguments are constant).
315 // If this function is not called by the setup function of the derived
316 // class, the result is not constant.
318
319 // Define if the UDF is an aggregate function (usually used in GROUPBY).
321
322 // Let a derived class recreate its column objects in case a selection
323 // has to be applied.
324 // The default implementation does nothing.
325 virtual void recreateColumnObjects (const Vector<rownr_t>& rownrs);
326
327 public:
328 // Register the name and construction function of a UDF (thread-safe).
329 // An exception is thrown if this name already exists with a different
330 // construction function.
331 static void registerUDF (const String& name, MakeUDFObject* func);
332
333 // Initialize the function object.
334 void init (const std::vector<TENShPtr>& arg,
335 const Table& table, const TaQLStyle&);
336
337 // Get the data type.
340
341 // Get the dimensionality of the results.
342 // (0=scalar, -1=array with variable ndim, >0=array with fixed ndim
343 Int ndim() const
344 { return itsNDim; }
345
346 // Get the result shape if the same for all results.
347 const IPosition& shape() const
348 { return itsShape; }
349
350 // Tell if the UDF gives a constant result.
352 { return itsIsConstant; }
353
354 // Tell if the UDF is an aggregate function.
356 { return itsIsAggregate; }
357
358 // Do not apply the selection.
361
362 // If needed, let the UDF re-create column objects for a selection of rows.
363 // It calls the function recreateColumnObjects.
364 void applySelection (const Vector<rownr_t>& rownrs);
365
366 // Create a UDF object (thread-safe).
367 // It looks in the map with fixed function names. If unknown,
368 // it looks if a wildcarded function name is supported (for PyTaQL).
369 static UDFBase* createUDF (const String& name, const TaQLStyle& style);
370
371 private:
372 //# Data members.
373 std::vector<TENShPtr> itsOperands;
382 //# The registry is used for two purposes:
383 //# 1. It is a map of known function names (lib.func) to funcptr.
384 //# Function name * means that the library can contain any function,
385 //# which is intended for python functions (through PyTaQL).
386 //# 2. The loaded libraries are kept in the map (with 0 funcptr).
387 static map<String, MakeUDFObject*> theirRegistry;
388 static std::recursive_mutex theirMutex;
389 };
390
391} // end namespace
392
393#endif
String: the storage and methods of handling collections of characters.
Definition String.h:225
NodeDataType
Define the data types of a node.
virtual MArray< MVTime > getArrayDate(const TableExprId &id)
virtual MArray< Int64 > getArrayInt(const TableExprId &id)
virtual Double getDouble(const TableExprId &id)
const Record & getAttributes() const
Get the attributes.
Definition UDFBase.h:271
const String & getUnit() const
Get the unit.
Definition UDFBase.h:267
IPosition itsShape
Definition UDFBase.h:376
TableExprNodeRep::NodeDataType itsDataType
Definition UDFBase.h:374
void setAttributes(const Record &attributes)
Set the attributes of the result.
std::vector< TENShPtr > & operands()
Get the operands.
Definition UDFBase.h:287
void setDataType(TableExprNodeRep::NodeDataType)
Set the data type.
virtual Int64 getInt(const TableExprId &id)
TableExprNodeRep::NodeDataType dataType() const
Get the data type.
Definition UDFBase.h:338
virtual MVTime getDate(const TableExprId &id)
void init(const std::vector< TENShPtr > &arg, const Table &table, const TaQLStyle &)
Initialize the function object.
Bool itsApplySelection
Definition UDFBase.h:381
virtual TaqlRegex getRegex(const TableExprId &id)
UDFBase * MakeUDFObject(const String &functionName)
The signature of a global or static member function creating an object of the UDF.
Definition UDFBase.h:240
void applySelection(const Vector< rownr_t > &rownrs)
If needed, let the UDF re-create column objects for a selection of rows.
Int ndim() const
Get the dimensionality of the results.
Definition UDFBase.h:343
void setAggregate(Bool isAggregate)
Define if the UDF is an aggregate function (usually used in GROUPBY).
Bool isAggregate() const
Tell if the UDF is an aggregate function.
Definition UDFBase.h:355
void setUnit(const String &unit)
Set the unit of the result.
virtual void recreateColumnObjects(const Vector< rownr_t > &rownrs)
Let a derived class recreate its column objects in case a selection has to be applied.
std::vector< TENShPtr > itsOperands
Definition UDFBase.h:373
virtual MArray< DComplex > getArrayDComplex(const TableExprId &id)
UDFBase()
Only default constructor is needed.
static UDFBase * createUDF(const String &name, const TaQLStyle &style)
Create a UDF object (thread-safe).
virtual Bool getBool(const TableExprId &id)
Evaluate the function and return the result.
virtual DComplex getDComplex(const TableExprId &id)
void setConstant(Bool isConstant)
Define if the result is constant (e.g.
const IPosition & shape() const
Get the result shape if the same for all results.
Definition UDFBase.h:347
void getAggrNodes(vector< TableExprNodeRep * > &aggr)
Get the nodes in the function operands representing an aggregate function.
void setNDim(Int ndim)
Set the dimensionality of the results.
void getColumnNodes(vector< TableExprNodeRep * > &cols)
Get the nodes in the function operands representing a table column.
static std::recursive_mutex theirMutex
Definition UDFBase.h:388
virtual MArray< Double > getArrayDouble(const TableExprId &id)
virtual ~UDFBase()
Destructor.
Record itsAttributes
Definition UDFBase.h:378
static map< String, MakeUDFObject * > theirRegistry
Definition UDFBase.h:387
Bool isConstant() const
Tell if the UDF gives a constant result.
Definition UDFBase.h:351
void disableApplySelection()
Do not apply the selection.
Definition UDFBase.h:359
virtual String getString(const TableExprId &id)
static void registerUDF(const String &name, MakeUDFObject *func)
Register the name and construction function of a UDF (thread-safe).
virtual MArray< String > getArrayString(const TableExprId &id)
void setShape(const IPosition &shape)
Set the shape of the results if it is fixed and known.
virtual MArray< Bool > getArrayBool(const TableExprId &id)
virtual void setup(const Table &table, const TaQLStyle &)=0
Set up the function object.
this file contains all the compiler specific defines
Definition mainpage.dox:28
const Bool False
Definition aipstype.h:44
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
Definition aipsxtype.h:38
int Int
Definition aipstype.h:50
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:42
double Double
Definition aipstype.h:55