casacore
tables
Tables.h
Go to the documentation of this file.
1
//# Tables.h: The Tables module - Casacore data storage
2
//# Copyright (C) 1994-2010
3
//# Associated Universities, Inc. Washington DC, USA.
4
//#
5
//# This library is free software; you can redistribute it and/or modify it
6
//# under the terms of the GNU Library General Public License as published by
7
//# the Free Software Foundation; either version 2 of the License, or (at your
8
//# option) any later version.
9
//#
10
//# This library is distributed in the hope that it will be useful, but WITHOUT
11
//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13
//# License for more details.
14
//#
15
//# You should have received a copy of the GNU Library General Public License
16
//# along with this library; if not, write to the Free Software Foundation,
17
//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18
//#
19
//# Correspondence concerning AIPS++ should be addressed as follows:
20
//# Internet email: aips2-request@nrao.edu.
21
//# Postal address: AIPS++ Project Office
22
//# National Radio Astronomy Observatory
23
//# 520 Edgemont Road
24
//# Charlottesville, VA 22903-2475 USA
25
//#
26
//# $Id$
27
28
#ifndef TABLES_TABLES_H
29
#define TABLES_TABLES_H
30
31
//# Includes
32
//# table description
33
#include <casacore/casa/aips.h>
34
#include <casacore/tables/Tables/TableDesc.h>
35
#include <casacore/tables/Tables/ColumnDesc.h>
36
#include <casacore/tables/Tables/ScaColDesc.h>
37
#include <casacore/tables/Tables/ArrColDesc.h>
38
#include <casacore/tables/Tables/ScaRecordColDesc.h>
39
40
//# table access
41
#include <casacore/tables/Tables/Table.h>
42
#include <casacore/tables/Tables/TableLock.h>
43
#include <casacore/tables/Tables/SetupNewTab.h>
44
#include <casacore/tables/Tables/ScalarColumn.h>
45
#include <casacore/tables/Tables/ArrayColumn.h>
46
#include <casacore/tables/Tables/TableRow.h>
47
#include <casacore/tables/Tables/TableCopy.h>
48
#include <casacore/casa/Arrays/Array.h>
49
#include <casacore/casa/Arrays/Slicer.h>
50
#include <casacore/casa/Arrays/Slice.h>
51
52
//# keywords
53
#include <casacore/tables/Tables/TableRecord.h>
54
#include <casacore/casa/Containers/RecordField.h>
55
56
//# table lookup
57
#include <casacore/tables/Tables/ColumnsIndex.h>
58
#include <casacore/tables/Tables/ColumnsIndexArray.h>
59
60
//# table vectors
61
#include <casacore/tables/Tables/TableVector.h>
62
#include <casacore/tables/Tables/TabVecMath.h>
63
#include <casacore/tables/Tables/TabVecLogic.h>
64
65
//# data managers
66
#include <casacore/tables/DataMan.h>
67
68
//# table expressions (for selection of rows)
69
#include <casacore/tables/TaQL.h>
70
71
72
namespace
casacore
{
//# NAMESPACE CASACORE - BEGIN
73
74
// <module>
75
76
// <summary>
77
// Tables are the data storage mechanism for Casacore
78
// </summary>
79
80
// <use visibility=export>
81
82
// <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos="">
83
// </reviewed>
84
85
// <prerequisite>
86
// <li> <linkto class="Record:description">Record</linkto> class
87
// </prerequisite>
88
89
// <etymology>
90
// "Table" is a formal term from relational database theory:
91
// <em> "The organizing principle in a relational database is the TABLE,
92
// a rectangular, row/column arrangement of data values."</em>
93
// Casacore tables are extensions to traditional tables, but are similar
94
// enough that we use the same name. There is also a strong resemblance
95
// between the uses of Casacore tables, and FITS binary tables, which
96
// provides another reason to use "Tables" to describe the Casacore data
97
// storage mechanism.
98
// </etymology>
99
100
// <synopsis>
101
// Tables are the fundamental storage mechanism for Casacore. This document
102
// explains <A HREF="#Tables:motivation">why</A> they had to be made,
103
// <A HREF="#Tables:properties">what</A> their properties are, and
104
// <A HREF="#Tables:open">how</A> to use them. The last subject is
105
// discussed and illustrated in a sequence of sections:
106
// <UL>
107
// <LI> <A HREF="#Tables:open">opening</A> an existing table,
108
// <LI> <A HREF="#Tables:read">reading</A> from a table,
109
// <LI> <A HREF="#Tables:creation">creating</A> a new table,
110
// <LI> <A HREF="#Tables:write">writing</A> into a table,
111
// <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table,
112
// <LI> <A HREF="#Tables:select and sort">selection and sorting</A>
113
// (see also <A HREF="../notes/199.html">Table Query Language</A>),
114
// <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A>
115
// <LI> <A HREF="#Tables:iterate">iterating</A> through a table,
116
// <LI> <A HREF="#Tables:LockSync">locking/synchronization</A>
117
// for concurrent access,
118
// <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup,
119
// <LI> <A HREF="#Tables:vectors">vector operations</A> on a column.
120
// <LI> <A HREF="#Tables:performance">performance and robustness</A>
121
// considerations with some information on
122
// <A HREF="#Tables:iotracing">IO tracing</A>.
123
// </UL>
124
// A few <A HREF="Tables:applications">applications</A> exist to inspect
125
// and manipulate a table.
126
127
128
// <ANCHOR NAME="Tables:motivation">
129
// <motivation></ANCHOR>
130
//
131
// The Casacore tables are mainly based upon the ideas of Allen Farris,
132
// as laid out in the
133
// <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz">
134
// AIPS++ Database document</A>, from where the following paragraph is taken:
135
//
136
// <p>
137
// Traditional relational database tables have two features that
138
// decisively limit their applicability to scientific data. First, an item of
139
// data in a column of a table must be atomic -- it must have no internal
140
// structure. A consequence of this restriction is that relational
141
// databases are unable to deal with arrays of data items. Second, an
142
// item of data in a column of a table must not have any direct or
143
// implied linkages to other items of data or data aggregates. This
144
// restriction makes it difficult to model complex relationships between
145
// collections of data. While these restrictions may make it easy to
146
// define a mathematically complete set of data manipulation operations,
147
// they are simply intolerable in a scientific data-handling context.
148
// Multi-dimensional arrays are frequently the most natural modes in
149
// which to discuss and think about scientific data. In addition,
150
// scientific data often requires complex calibration operations that
151
// must draw on large bodies of data about equipment and its performance
152
// in various states. The restrictions imposed by the relational model
153
// make it very difficult to deal with complex problems of this nature.
154
// <p>
155
//
156
// In response to these limitations, and other needs, the Casacore tables were
157
// designed.
158
// </motivation>
159
160
// <ANCHOR NAME="Tables:properties">
161
// <h3>Table Properties</h3></ANCHOR>
162
//
163
// Casacore tables have the following properties:
164
// <ul>
165
// <li> A table consists of a number of rows and columns.
166
// <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined
167
// for the table as a whole and for individual columns. A keyword/value
168
// pair for a column could, for instance, define its unit.
169
// <li> Each table has a <A HREF="#Tables:Table Description">description</A>
170
// which specifies the number and type of columns, and maybe initial
171
// keyword sets and default values for the columns.
172
// <li> A cell in a column may contain
173
// <UL>
174
// <LI> a scalar;
175
// <LI> a "direct" array -- which must have the same shape in all
176
// cells of a column, is usually small, and is stored in the
177
// table itself;
178
// <LI> an "indirect" array -- which may have different shapes in
179
// different cells of the same column, is arbitrarily large,
180
// and is stored in a separate file; or
181
// </UL>
182
// <li> A column may be
183
// <UL>
184
// <LI> "filled" -- containing actual data, or
185
// <LI> "virtual" -- containing a recipe telling how the data will
186
// be generated dynamically
187
// </UL>
188
// <li> Only the standard Casacore data types can be used in filled
189
// columns, be they scalars or arrays: Bool, uChar, Short, uShort,
190
// Int, uInt, float, double, Complex, DComplex and String.
191
// Furthermore scalars containing
192
// <linkto class=TableRecord>record</linkto> values are possible
193
// <li> A column can have a default value, which will automatically be stored
194
// in a cell of the column, when a row is added to the table.
195
// <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the
196
// reading, writing and generation of data. Each column in a table can
197
// be assigned its own data manager, which allows for optimization of
198
// the data storage per column. The choice of data manager determines
199
// whether a column is filled or virtual.
200
// <li> Table data are stored in a canonical format, so they can be read
201
// on any machine. To avoid needless swapping of bytes, the data can
202
// be stored in big endian (as used on e.g. SUN) or little endian
203
// (as used on Intel PC-s) canonical format.
204
// By default it uses the format specified in the aipsrc variable
205
// <code>table.endianformat</code> which defaults to
206
// <code>Table::LocalEndian</code> (thus the endian format of the
207
// machine being used).
208
// <li> The SQL-like
209
// <a href="../notes/199.html">Table Query Language</a> (TaQL)
210
// can be used to do operations on tables like
211
// select, sort, update, insert, delete, and create.
212
// </ul>
213
//
214
// Tables can be in one of three forms:
215
// <ul>
216
// <li> A plain table is a table stored on disk.
217
// It can be shared by multiple processes.
218
// <li> A memory table is a table held in memory.
219
// It is a process specific table, thus not sharable.
220
// The <linkto class=Table>Table::copy</linkto> function can be used
221
// to turn a memory table into a plain table.
222
// <li> A reference table is a table referencing a plain or memory table.
223
// It is the result of a selection or sort on another table.
224
// A reference table references the data in the other table, thus
225
// changing data in a reference table means that the data in the
226
// original table are changed.
227
// The <linkto class=Table>Table::deepCopy</linkto> function can be
228
// used to turn a reference table into a plain table.
229
// </ul>
230
// Concurrent access from different processes to the same plain table is
231
// fully supported by means of a <A HREF="#Tables:LockSync">
232
// locking/synchronization</A> mechanism. Concurrent access over NFS is also
233
// supported.
234
// <p>
235
// A (somewhat primitive) mechanism is available to do a
236
// <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents
237
// of a key. In the future this might be replaced by a proper B+-tree index
238
// mechanism.
239
240
// <ANCHOR NAME="Tables:open">
241
// <h3>Opening an Existing Table</h3></ANCHOR>
242
//
243
// To open an existing table you just create a
244
// <linkto class="Table:description">Table</linkto> object giving
245
// the name of the table, like:
246
//
247
// <srcblock>
248
// Table readonly_table ("tableName");
249
// // or
250
// Table read_and_write_table ("tableName", Table::Update);
251
// </srcblock>
252
//
253
// The constructor option determines whether the table will be opened as
254
// readonly or as read/write. A readonly table file must be opened
255
// as readonly, otherwise an exception is thrown. The functions
256
// <linkto class="Table">Table::isWritable(...)</linkto>
257
// can be used to determine if a table is writable.
258
//
259
// When the table is opened, the data managers are reinstantiated
260
// according to their definition at table creation.
261
262
// <ANCHOR NAME="Tables:read">
263
// <h3>Reading from a Table</h3></ANCHOR>
264
//
265
// You can read data from a table column with the "get" functions
266
// in the classes
267
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>
268
// and
269
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>.
270
// For scalars of a standard data type (i.e. Bool, uChar, Int, Short,
271
// uShort, uInt, float, double, Complex, DComplex and String) you could
272
// instead use
273
// <linkto class="TableColumn">TableColumn::getScalar(...)</linkto> or
274
// <linkto class="TableColumn">TableColumn::asXXX(...)</linkto>.
275
// These functions offer an extra: they do automatic data type promotion;
276
// so that you can, for example, get a double value from a float column.
277
//
278
// These "get" functions are used in the same way as the simple "put"
279
// functions described in the previous section.
280
// <p>
281
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>
282
// can be constructed for a non-writable column. However, an exception
283
// is thrown if the put function is used for it.
284
// The same is true for
285
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto> and
286
// <linkto class="TableColumn:description">TableColumn</linkto>.
287
// <p>
288
// A typical program could look like:
289
// <srcblock>
290
// #include <casacore/tables/Tables/Table.h>
291
// #include <casacore/tables/Tables/ScalarColumn.h>
292
// #include <casacore/tables/Tables/ArrayColumn.h>
293
// #include <casacore/casa/Arrays/Vector.h>
294
// #include <casacore/casa/Arrays/Slicer.h>
295
// #include <casacore/casa/Arrays/ArrayMath.h>
296
// #include <iostream>
297
//
298
// main()
299
// {
300
// // Open the table (readonly).
301
// Table tab ("some.name");
302
//
303
// // Construct the various column objects.
304
// // Their data type has to match the data type in the table description.
305
// ScalarColumn<Int> acCol (tab, "ac");
306
// ArrayColumn<Float> arr2Col (tab, "arr2");
307
//
308
// // Loop through all rows in the table.
309
// uInt nrrow = tab.nrow();
310
// for (uInt i=0; i<nrow; i++) {
311
// // Read the row for both columns.
312
// cout << "Column ac in row i = " << acCol(i) << endl;
313
// Array<Float> array = arr2Col.get (i);
314
// }
315
//
316
// // Show the entire column ac,
317
// // and show the 10th element of arr2 in each row..
318
// cout << ac.getColumn();
319
// cout << arr2.getColumn (Slicer(Slice(10)));
320
// }
321
// </srcblock>
322
323
// <ANCHOR NAME="Tables:creation">
324
// <h3>Creating a Table</h3></ANCHOR>
325
//
326
// The creation of a table is a multi-step process:
327
// <ol>
328
// <li>
329
// Create a <A HREF="#Tables:Table Description">table description</A>.
330
// <li>
331
// Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto>
332
// object with the name of the new table.
333
// <li>
334
// Create the necessary <A HREF="#Tables:Data Managers">data managers</A>.
335
// <li>
336
// Bind each column to the appropriate data manager.
337
// The system will bind unbound columns to data managers which
338
// are created internally using the default data manager name
339
// defined in the column description.
340
// <li>
341
// Define the shape of direct columns (if that was not already done in the
342
// column description).
343
// <li>
344
// Create the <linkto class="Table:description">Table</linkto>
345
// object from the SetupNewTable object. Here, a final check is performed
346
// and the necessary files are created.
347
// </ol>
348
// The recipe above is meant for the creation a plain table, but the
349
// creation of a memory table is exactly the same. The only difference
350
// is that in call to construct the Table object the Table::Memory
351
// type has to be given. Note that in the SetupNewTable object the columns
352
// can be bound to any data manager. <src>MemoryTable</src> will rebind
353
// stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto>
354
// storage manager, but virtual columns bindings are not changed.
355
356
//
357
// The following example shows how you can create a table. An example
358
// specifically illustrating the creation of the
359
// <A HREF="#Tables:Table Description">table description</A> is given
360
// in that section. Other sections discuss the access to the table.
361
//
362
// <srcblock>
363
// #include <casacore/tables/Tables/TableDesc.h>
364
// #include <casacore/tables/Tables/SetupNewTab.h>
365
// #include <casacore/tables/Tables/Table.h>
366
// #include <casacore/tables/Tables/ScaColDesc.h>
367
// #include <casacore/tables/Tables/ScaRecordColDesc.h>
368
// #include <casacore/tables/Tables/ArrColDesc.h>
369
// #include <casacore/tables/Tables/StandardStMan.h>
370
// #include <casacore/tables/Tables/IncrementalStMan.h>
371
//
372
// main()
373
// {
374
// // Step1 -- Build the table description.
375
// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
376
// td.comment() = "A test of class SetupNewTable";
377
// td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab"));
378
// td.addColumn (ScalarColumnDesc<Int> ("ac"));
379
// td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad"));
380
// td.addColumn (ScalarColumnDesc<Float> ("ae"));
381
// td.addColumn (ScalarRecordColumnDesc ("arec"));
382
// td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct));
383
// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
384
// td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct));
385
//
386
// // Step 2 -- Setup a new table from the description.
387
// SetupNewTable newtab("newtab.data", td, Table::New);
388
//
389
// // Step 3 -- Create storage managers for it.
390
// StandardStMan stmanStand_1;
391
// StandardStMan stmanStand_2;
392
// IncrementalStMan stmanIncr;
393
//
394
// // Step 4 -- First, bind all columns to the first storage
395
// // manager. Then, bind a few columns to another storage manager
396
// // (which will overwrite the previous bindings).
397
// newtab.bindAll (stmanStand_1);
398
// newtab.bindColumn ("ab", stmanStand_2);
399
// newtab.bindColumn ("ae", stmanIncr);
400
// newtab.bindColumn ("arr3", stmanIncr);
401
//
402
// // Step 5 -- Define the shape of the direct columns.
403
// // (this could have been done in the column description).
404
// newtab.setShapeColumn( "arr1", IPosition(3,2,3,4));
405
// newtab.setShapeColumn( "arr3", IPosition(3,3,4,5));
406
//
407
// // Step 6 -- Finally, create the table consisting of 10 rows.
408
// Table tab(newtab, 10);
409
//
410
// // Now we can fill the table, which is shown in a next section.
411
// // The Table destructor will flush the table to the files.
412
// }
413
// </srcblock>
414
// To create a table in memory, only step 6 has to be modified slightly to:
415
// <srcblock>
416
// Table tab(newtab, Table::Memory, 10);
417
// </srcblock>
418
419
// <ANCHOR NAME="Tables:write">
420
// <h3>Writing into a Table</h3></ANCHOR>
421
//
422
// Once a table has been created or has been opened for read/write,
423
// you want to write data into it. Before doing that you may have
424
// to add one or more rows to the table.
425
// <note role=tip> If a table was created with a given number of rows, you
426
// do not need to add rows; you may not even be able to do so.
427
// </note>
428
//
429
// When adding new rows to the table, either via the
430
// <linkto class="Table">Table(...) constructor</linkto>
431
// or via the
432
// <linkto class="Table">Table::addRow(...)</linkto>
433
// function, you can choose to have those rows initialized with the
434
// default values given in the description.
435
//
436
// To actually write the data into the table you need the classes
437
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto> and
438
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>.
439
// For each column you can construct one or
440
// more of these objects. Their put(...) functions
441
// let you write a value at a time or the entire column in one go.
442
// For arrays you can "put" subsections of the arrays.
443
//
444
// As an alternative for scalars of a standard data type (i.e. Bool,
445
// uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex
446
// and String) you could use the functions
447
// <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>.
448
// These functions offer an extra: automatic data type promotion; so that
449
// you can, for example, put a float value in a double column.
450
//
451
// A typical program could look like:
452
// <srcblock>
453
// #include <casacore/tables/Tables/TableDesc.h>
454
// #include <casacore/tables/Tables/SetupNewTab.h>
455
// #include <casacore/tables/Tables/Table.h>
456
// #include <casacore/tables/Tables/ScaColDesc.h>
457
// #include <casacore/tables/Tables/ArrColDesc.h>
458
// #include <casacore/tables/Tables/ScalarColumn.h>
459
// #include <casacore/tables/Tables/ArrayColumn.h>
460
// #include <casacore/casa/Arrays/Vector.h>
461
// #include <casacore/casa/Arrays/Slicer.h>
462
// #include <casacore/casa/Arrays/ArrayMath.h>
463
// #include <iostream>
464
//
465
// main()
466
// {
467
// // First build the table description.
468
// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
469
// td.comment() = "A test of class SetupNewTable";
470
// td.addColumn (ScalarColumnDesc<Int> ("ac"));
471
// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
472
//
473
// // Setup a new table from the description,
474
// // and create the (still empty) table.
475
// // Note that since we do not explicitly bind columns to
476
// // data managers, all columns will be bound to the default
477
// // standard storage manager StandardStMan.
478
// SetupNewTable newtab("newtab.data", td, Table::New);
479
// Table tab(newtab);
480
//
481
// // Construct the various column objects.
482
// // Their data type has to match the data type in the description.
483
// ScalarColumn<Int> ac (tab, "ac");
484
// ArrayColumn<Float> arr2 (tab, "arr2");
485
// Vector<Float> vec2(100);
486
//
487
// // Write the data into the columns.
488
// // In each cell arr2 will be a vector of length 100.
489
// // Since its shape is not set explicitly, it is done implicitly.
490
// for (uInt i=0; i<10; i++) {
491
// tab.addRow(); // First add a row.
492
// ac.put (i, i+10); // value is i+10 in row i
493
// indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119
494
// arr2.put (i, vec2);
495
// }
496
//
497
// // Finally, show the entire column ac,
498
// // and show the 10th element of arr2.
499
// cout << ac.getColumn();
500
// cout << arr2.getColumn (Slicer(Slice(10)));
501
//
502
// // The Table destructor writes the table.
503
// }
504
// </srcblock>
505
//
506
// In this example we added rows in the for loop, but we could also have
507
// created 10 rows straightaway by constructing the Table object as:
508
// <srcblock>
509
// Table tab(newtab, 10);
510
// </srcblock>
511
// in which case we would not include
512
// <srcblock>
513
// tab.addRow()
514
// </srcblock>
515
//
516
// The classes
517
// <linkto class="TableColumn:description">TableColumn</linkto>,
518
// <linkto class="ScalarColumn:description">ScalarColumn<T></linkto>, and
519
// <linkto class="ArrayColumn:description">ArrayColumn<T></linkto>
520
// contain several functions to put values into a single cell or into the
521
// whole column. This may look confusing, but is actually quite simple.
522
// The functions can be divided in two groups:
523
// <ol>
524
// <li>
525
// Put the given value into the column cell(s).
526
// <ul>
527
// <li>
528
// The simplest put functions,
529
// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and
530
// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
531
// put a value into the given column cell. For convenience, there is an
532
// <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto>
533
// to put only a part of the array.
534
// <li>
535
// <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and
536
// <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto>
537
// fill an entire column by putting the given value into all the cells
538
// of the column.
539
// <li>
540
// The simplest putColumn functions,
541
// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and
542
// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>,
543
// put an array of values into the column. There is a special
544
// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
545
// version which puts only a part of the arrays.
546
// </ul>
547
//
548
// <li>
549
// Copy values from another column to this column.<BR>
550
// These functions have the advantage that the
551
// data type of the input and/or output column can be unknown.
552
// The generic TableColumn objects can be used for this purpose.
553
// The put(Column) function checks the data types and, if possible,
554
// converts them. If the conversion is not possible, it throws an
555
// exception.
556
// <ul>
557
// <li>
558
// The put functions copy the value in a cell of the input column
559
// to a cell in the output column. The row numbers of the cells
560
// in the columns can be different.
561
// <li>
562
// The putColumn functions copy the entire contents of the input column
563
// to the output column. The lengths of the columns must be equal.
564
// </ul>
565
// Each class has its own set of these functions.
566
// <ul>
567
// <li>
568
// <linkto class="TableColumn">TableColumn::put(...)</linkto> and
569
// <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and
570
// are the most generic. They can be
571
// used if the data types of both input and output column are unknown.
572
// Note that these functions are virtual.
573
// <li>
574
// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>,
575
// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
576
// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and
577
// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
578
// are less generic and therefore potentially more efficient.
579
// The most efficient variants are the ones taking a
580
// Scalar/ArrayColumn<T>, because they require no data type
581
// conversion.
582
// </ul>
583
// </ol>
584
585
// <ANCHOR NAME="Tables:row-access">
586
// <h3>Accessing rows in a Table</h3></ANCHOR>
587
//
588
// Apart from accessing a table column-wise as described in the
589
// previous two sections, it is also possible to access a table row-wise.
590
// The <linkto class=TableRow>TableRow</linkto> class makes it possible
591
// to access multiple fields in a table row as a whole. Note that like the
592
// XXColumn classes described above, there is also an ROTableRow class
593
// for access to readonly tables.
594
// <p>
595
// On construction of a TableRow object it has to be specified which
596
// fields (i.e. columns) are part of the row. For these fields a
597
// fixed structured <linkto class=TableRecord>TableRecord</linkto>
598
// object is constructed as part of the TableRow object. The TableRow::get
599
// function will fill this record with the table data for the given row.
600
// The user has access to the record and can use
601
// <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for
602
// speedier access to the record.
603
// <p>
604
// The class could be used as shown in the following example.
605
// <srcblock>
606
// // Open the table as readonly and define a row object to contain
607
// // the given columns.
608
// // Note that the function stringToVector is a very convenient
609
// // way to construct a Vector<String>.
610
// // Show the description of the fields in the row.
611
// Table table("Some.table");
612
// ROTableRow row (table, stringToVector("col1,col2,col3"));
613
// cout << row.record().description();
614
// // Since the structure of the record is known, the RecordFieldPtr
615
// // objects could be used to allow for easy and fast access to
616
// // the record which is refilled for each get.
617
// RORecordFieldPtr<String> col1(row.record(), "col1");
618
// RORecordFieldPtr<Double> col2(row.record(), "col2");
619
// RORecordFieldPtr<Array<Int> > col3(row.record(), "col3");
620
// for (uInt i=0; i<table.nrow(); i++) {
621
// row.get (i);
622
// someString = *col1;
623
// somedouble = *col2;
624
// someArrayInt = *col3;
625
// }
626
// </srcblock>
627
// The description of TableRow contains some more extensive examples.
628
629
// <ANCHOR NAME="Tables:select and sort">
630
// <h3>Table Selection and Sorting</h3></ANCHOR>
631
//
632
// The result of a select and sort of a table is another table,
633
// which references the original table. This means that an update
634
// of a sorted or selected table results in the update of the original
635
// table. The result is, however, a table in itself, so all table
636
// functions (including select and sort) can be used with it.
637
// Note that a true copy of such a reference table can be made with
638
// the <linkto class=Table>Table::deepCopy</linkto> function.
639
// <p>
640
// Rows or columns can be selected from a table. Columns can be selected
641
// by the
642
// <linkto class="Table">Table::project(...)</linkto>
643
// function, while rows can be selected by the various
644
// <linkto class="Table">Table operator()</linkto> functions.
645
// Usually a row is selected by giving a select expression with
646
// <linkto class="TableExprNode:description">TableExprNode</linkto>
647
// objects. These objects represent the various nodes
648
// in an expression, e.g. a constant, a column, or a subexpression.
649
// The Table function
650
// <linkto class="Table">Table::col(...)</linkto>
651
// creates a TableExprNode object for a column. The function
652
// <linkto class="Table">Table::key(...)</linkto>
653
// does the same for a keyword by reading
654
// the keyword value and storing it as a constant in an expression node.
655
// All column nodes in an expression must belong to the same table,
656
// otherwise an exception is thrown.
657
// In the following example we select all rows with RA>10:
658
// <srcblock>
659
// #include <casacore/tables/Tables/ExprNode.h>
660
// Table table ("Table.name");
661
// Table result = table (table.col("RA") > 10);
662
// </srcblock>
663
// while in the next one we select rows with RA and DEC in the given
664
// intervals:
665
// <srcblock>
666
// Table result = table (table.col("RA") > 10
667
// && table.col("RA") < 14
668
// && table.col("DEC") >= -10
669
// && table.col("DEC") <= 10);
670
// </srcblock>
671
// The following operators can be used to form arbitrarily
672
// complex expressions:
673
// <ul>
674
// <li> Relational operators ==, !=, >, >=, < and <=.
675
// <li> Logical operators &&, || and !.
676
// <li> Arithmetic operators +, -, *, /, %, and unary + and -.
677
// <li> Bit operators ^, &, |, and unary ~.
678
// <li> Operator() to take a subsection of an array.
679
// </ul>
680
// Many functions (like sin, max, conj) can be used in an expression.
681
// Class <linkto class=TableExprNode>TableExprNode</linkto> shows
682
// the available functions.
683
// E.g.
684
// <srcblock>
685
// Table result = table (sin (table.col("RA")) > 0.5);
686
// </srcblock>
687
// Function <src>in</src> can be used to select from a set of values.
688
// A value set can be constructed using class
689
// <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>.
690
// <srcblock>
691
// TableExprNodeSet set;
692
// set.add (TableExprNodeSetElem ("abc"));
693
// set.add (TableExprNodeSetElem ("defg"));
694
// set.add (TableExprNodeSetElem ("h"));
695
// Table result = table (table.col("NAME).in (set));
696
// </srcblock>
697
// select rows with a NAME equal to <src>abc</src>,
698
// <src>defg</src>, or <src>h</src>.
699
//
700
// <p>
701
// You can sort a table on one or more columns containing scalars.
702
// In this example we simply sort on column RA (default is ascending):
703
// <srcblock>
704
// Table table ("Table.name");
705
// Table result = table.sort ("RA");
706
// </srcblock>
707
// Multiple
708
// <linkto class="Table">Table::sort(...)</linkto>
709
// functions exist which allow for more flexible control over the sort order.
710
// In the next example we sort first on RA in descending order
711
// and then on DEC in ascending order:
712
// <srcblock>
713
// Table table ("Table.name");
714
// Block<String> sortKeys(2);
715
// Block<int> sortOrders(2);
716
// sortKeys(0) = "RA";
717
// sortOrders(0) = Sort::Descending;
718
// sortKeys(1) = "DEC";
719
// sortOrders(1) = Sort::Ascending;
720
// Table result = table.sort (sortKeys, sortOrders);
721
// </srcblock>
722
//
723
// Tables stemming from the same root, can be combined in several
724
// ways with the help of the various logical
725
// <linkto class="Table">Table operators</linkto> (operator|, etc.).
726
727
// <h4>Table Query Language</h4>
728
// The selection and sorting mechanism described above can only be used
729
// in a hard-coded way in a C++ program.
730
// There is, however, another way. Strings containing selection and
731
// sorting commands can be used.
732
// The syntax of these commands is based on SQL and is described in the
733
// <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199.
734
// The language supports UDFs (User Defined Functions) in dynamically
735
// loadable libraries as explained in the note.
736
// <br>A TaQL command can be executed with the static function
737
// <src>tableCommand</src> defined in class
738
// <linkto class=TableParse>TableParse</linkto>.
739
740
// <ANCHOR NAME="Tables:concatenation">
741
// <h3>Table Concatenation</h3></ANCHOR>
742
// Tables with identical descriptions can be concatenated in a virtual way
743
// using the Table concatenation constructor. Such a Table object behaves
744
// as any other Table object, thus any operation can be performed on it.
745
// An identical description means that the number of columns, the column names,
746
// and their data types of the columns must be the same. The columns do not
747
// need to be ordered in the same way nor to be stored in the same way.
748
// <br>Note that if tables have different column names, it is possible
749
// to form a projection (as described in the previous section) first
750
// to make them appear identical.
751
//
752
// Sometimes a MeasurementSet is partitioned, for instance in chunks of
753
// one hour. All those chunks can be virtually concatenated this way.
754
// Note that all tables in the concatenation will be opened, thus one might
755
// run out of file descriptors if there are many chunks.
756
//
757
// Similar to reference tables, it is possible to make a concatenated Table
758
// persistent by using the <src>rename</src> function. It will not copy the
759
// data; only the names of the tables used are written.
760
//
761
// The keywords of a concatenated table are taken from the first table.
762
// It is possible to change or add keywords, but that is not persistent,
763
// not even if the concatenated table is made persistent.
764
// <br>The keywords holding subtables can be handled in a special way.
765
// Normally the subtables of the concatenation are the subtables of the first
766
// table are used, but is it possible to concatenate subtables as well by
767
// giving their names in the constructor.
768
// In this way the, say, SYSCAL subtable of a MeasurementSet can be
769
// concatenated as well.
770
// <srcblock>
771
// // Create virtual concatenation of ms0 and ms1.
772
// Block<String> names(2);
773
// names[0] = "ms0";
774
// names[1] = "ms1";
775
// // Also concatenate their SYSCAL subtables.
776
// Block<String> subNames(1, "SYSCAL");
777
// Table concTab (names, subNames);
778
// </srcblock>
779
780
// <ANCHOR NAME="Tables:iterate">
781
// <h3>Table Iterators</h3></ANCHOR>
782
//
783
// You can iterate through a table in an arbitrary order by getting
784
// a subset of the table consisting of the rows in which the iteration
785
// columns have the same value.
786
// An iterator object is created by constructing a
787
// <linkto class="TableIterator:description">TableIterator</linkto>
788
// object with the appropriate column names.
789
//
790
// In the next example we define an iteration on the columns Time and
791
// Baseline. Each iteration step returns a table subset in which Time and
792
// Baseline have the same value.
793
//
794
// <srcblock>
795
// // Iterate over Time and Baseline (by default in ascending order).
796
// // Time is the main iteration order, thus the first column specified.
797
// Table t;
798
// Table tab ("UV_Table.data");
799
// Block<String> iv0(2);
800
// iv0[0] = "Time";
801
// iv0[1] = "Baseline";
802
// //
803
// // Create the iterator. This will prepare the first subtable.
804
// TableIterator iter(tab, iv0);
805
// Int nr = 0;
806
// while (!iter.pastEnd()) {
807
// // Get the first subtable.
808
// // This will contain rows with equal Time and Baseline.
809
// t = iter.table();
810
// cout << t.nrow() << " ";
811
// nr++;
812
// // Prepare the next subtable with the next Time,Baseline value.
813
// iter.next();
814
// }
815
// cout << endl << nr << " iteration steps" << endl;
816
// </srcblock>
817
//
818
// You can define more than one iterator on the same table; they operate
819
// independently.
820
//
821
// Note that the result of each iteration step is a table in itself which
822
// references the original table, just as in the case of a sort or select.
823
// This means that the resulting table can be used again in a sort, select,
824
// iteration, etc..
825
826
// <ANCHOR NAME="Tables:vectors">
827
// <h3>Table Vectors</h3></ANCHOR>
828
//
829
// A table vector makes it possible to treat a column in a table
830
// as a vector. Almost all operators and functions defined for normal
831
// vectors, are also defined for table vectors. So it is, for instance,
832
// possible to add a constant to a table vector. This has the effect
833
// that the underlying column gets changed.
834
//
835
// You can use the templated class
836
// <linkto class="TableVector:description">TableVector</linkto>
837
// to make a scalar column appear as a (table) vector.
838
// Columns containing arrays or tables are not supported.
839
// The data type of the TableVector object must match the
840
// data type of the column.
841
// A table vector can also hold a normal vector so that (temporary)
842
// results of table vector operations can be handled.
843
//
844
// In the following example we double the data in column COL1 and
845
// store the result in a temporary table vector.
846
// <srcblock>
847
// // Create a table vector for column COL1.
848
// // Note that if the table is readonly, putting data in the table vector
849
// // results in an exception.
850
// Table tab ("Table.data");
851
// TableVector<Int> tabvec(tab, "COL1");
852
// // Multiply it by a constant. Result is kept in a Vector in memory.
853
// TableVector<Int> temp = 2 * tabvec;
854
// </srcblock>
855
//
856
// In the next example we double the data in COL1 and put the result back
857
// in the column.
858
// <srcblock>
859
// // Create a table vector for column COL1.
860
// // It has to be a TableVector to be able to change the column.
861
// Table tab ("Table.data", Table::Update);
862
// TableVector<Int> tabvec(tab, "COL1");
863
// // Multiply it by a constant.
864
// tabvec *= 2;
865
// </srcblock>
866
867
// <ANCHOR NAME="Tables:keywords">
868
// <h3>Table Keywords</h3></ANCHOR>
869
//
870
// Any number of keyword/value pairs may be attached to the table as a whole,
871
// or to any individual column. They may be freely added, retrieved,
872
// re-assigned, or deleted. They are, in essence, a self-resizing list of
873
// values (any of the primitive types) indexed by Strings (the keyword).
874
//
875
// A table keyword/value pair might be
876
// <srcblock>
877
// Observer = Grote Reber
878
// Date = 10 october 1942
879
// </srcblock>
880
// Column keyword/value pairs might be
881
// <srcblock>
882
// Units = mJy
883
// Reference Pixel = 320
884
// </srcblock>
885
// The class
886
// <linkto class="TableRecord:description">TableRecord</linkto>
887
// represents the keywords in a table.
888
// It is (indirectly) derived from the standard record classes in the class
889
// <linkto class="Record:description">Record</linkto>
890
891
// <ANCHOR NAME="Tables:Table Description">
892
// <h3>Table Description</h3></ANCHOR>
893
//
894
// A table contains a description of itself, which defines the layout of the
895
// columns and the keyword sets for the table and for the individual columns.
896
// It may also define initial keyword sets and default values for the columns.
897
// Such a default value is automatically stored in a cell in the table column,
898
// whenever a row is added to the table.
899
//
900
// The creation of the table descriptor is the first step in the creation of
901
// a new table. The description is part of the table itself, but may also
902
// exist in a separate file. This is useful if you need to create a number
903
// of tables with the same structure; in other circumstances it probably
904
// should be avoided.
905
//
906
// The public classes to set up a table description are:
907
// <ul>
908
// <li> <linkto class="TableDesc:description">TableDesc</linkto>
909
// -- holds the table description.
910
// <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto>
911
// -- holds a generic column description.
912
// <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc<T>
913
// </linkto>
914
// -- defines a column containing a scalar value.
915
// <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc;
916
// </linkto>
917
// -- defines a column containing a scalar record value.
918
// <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc<T>
919
// </linkto>
920
// -- defines a column containing an (in)direct array.
921
// </ul>
922
//
923
// Here follows a typical example of the construction of a table
924
// description. For more specialized things -- like the definition of a
925
// default data manager -- we refer to the descriptions of the above
926
// mentioned classes.
927
//
928
// <srcblock>
929
// #include <casacore/tables/Tables/TableDesc.h>
930
// #include <casacore/tables/Tables/ScaColDesc.h>
931
// #include <casacore/tables/Tables/ArrColDesc.h>
932
// #include <casacore/tables/Tables/ScaRecordTabDesc.h>
933
// #include <casacore/tables/Tables/TableRecord.h>
934
// #include <casacore/casa/Arrays/IPosition.h>
935
// #include <casacore/casa/Arrays/Vector.h>
936
//
937
// main()
938
// {
939
// // Create a new table description
940
// // Define a comment for the table description.
941
// // Define some keywords.
942
// ColumnDesc colDesc1, colDesc2;
943
// TableDesc td("tTableDesc", "1", TableDesc::New);
944
// td.comment() = "A test of class TableDesc";
945
// td.rwKeywordSet().define ("ra" float(3.14));
946
// td.rwKeywordSet().define ("equinox", double(1950));
947
// td.rwKeywordSet().define ("aa", Int(1));
948
//
949
// // Define an integer column ab.
950
// td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab"));
951
//
952
// // Add a scalar integer column ac, define keywords for it
953
// // and define a default value 0.
954
// // Overwrite the value of keyword unit.
955
// ScalarColumnDesc<Int> acColumn("ac");
956
// acColumn.rwKeywordSet().define ("scale" Complex(0,0));
957
// acColumn.rwKeywordSet().define ("unit", "");
958
// acColumn.setDefault (0);
959
// td.addColumn (acColumn);
960
// td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG");
961
//
962
// // Add a scalar string column ad and define its comment string.
963
// td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad"));
964
//
965
// // Now define array columns.
966
// // This one is indirect and has no dimensionality mentioned yet.
967
// td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1"));
968
// // This one is indirect and has 3-dim arrays.
969
// td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3));
970
// // This one is direct and has 2-dim arrays with axes length 4 and 7.
971
// td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1",
972
// IPosition(2,4,7),
973
// ColumnDesc::Direct));
974
//
975
// // Add columns containing records.
976
// td.addColumn (ScalarRecordColumnDesc ("Rec1"));
977
// }
978
// </srcblock>
979
980
// <ANCHOR NAME="Tables:Data Managers">
981
// <h3>Data Managers</h3></ANCHOR>
982
//
983
// Data managers take care of the actual access to the data in a column.
984
// There are two kinds of data managers:
985
// <ol>
986
// <li> <A HREF="#Tables:storage managers">Storage managers</A> --
987
// which store the data as such. They can only handle the standard
988
// data type (Bool,...,String) as discussed in the section about the
989
// <A HREF="#Tables:properties">table properties</A>).
990
// <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A>
991
// -- which manipulate the data.
992
// An engine could be a simple thing like scaling the data (as done
993
// in classic AIPS to reduce data storage), but it could also be an
994
// elaborate thing like applying corrections on-the-fly.
995
// <br>An engine must be used to store data objects with a non-standard type.
996
// It has to break down the object into items with standard data types
997
// which can be stored with a storage manager.
998
// </ol>
999
// In general the user of a table does not need to be aware which
1000
// data managers are being used underneath. Only when the table is created
1001
// data managers have to be bound to the columns. Thereafter it is
1002
// completely transparent.
1003
//
1004
// Data managers needs to be registered, so they can be found when a table is
1005
// opened. All data managers mentioned below are part of the system and
1006
// pre-registered.
1007
// It is, however, also possible to load data managers on demand. If a data
1008
// manager is not registered it is tried to load a shared library with the
1009
// part of the data manager name (in lowercase) before a dot or left arrow.
1010
// The dot makes it possible to have multiple data managers in a shared library,
1011
// while the left arrow is meant for templated data manager classes.
1012
// <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared
1013
// library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If
1014
// successful, its function <src>register_bitflagsengine()</src> will be
1015
// executed which should register the data manager(s). Thereafter it is known
1016
// and will be used. For example in a file Register.h and Register.cc:
1017
// <srcblock>
1018
// // Declare in .h file as C function, so no name mangling is done.
1019
// extern "C" {
1020
// void register_bitflagsengine();
1021
// }
1022
// // Implement in .cc file.
1023
// void register_bitflagsengine()
1024
// {
1025
// BitFlagsEngine<uChar>::registerClass();
1026
// BitFlagsEngine<Short>::registerClass();
1027
// BitFlagsEngine<Int>::registerClass();
1028
// }
1029
// </srcblock>
1030
// There are several functions that can give information which data managers
1031
// are used for which columns and to obtain the characteristics and properties
1032
// of them. Class RODataManAccessor and derived classes can be used for it
1033
// as well as the functions <src>dataManagerInfo</src> and
1034
// <src>showStructure</src> in class Table.
1035
1036
// <ANCHOR NAME="Tables:storage managers">
1037
// <h3>Storage Managers</h3></ANCHOR>
1038
//
1039
// Storage managers are used to store the data contained in the column cells.
1040
// At table construction time the binding of columns to storage managers is done.
1041
// <br>Each storage manager uses one or more files (usually called table.fi_xxx
1042
// where i is a sequence number and _xxx is some kind of extension).
1043
// Typically several file are used to store the data of the columns of a table.
1044
// <br>In order to reduce the number of files (and to support large block sizes),
1045
// it is possible to have a single container file (a MultiFile) containing all
1046
// data files used by the storage managers. Such a file is called table.mf.
1047
// Note that the program <em>lsmf</em> can be used to see which
1048
// files are contained in a MultiFile. The program <em>tomf</em> can
1049
// convert the files in a MultiFile to regular files.
1050
// <br>At table creation time it is decided if a MultiFile will be used. It
1051
// can be done by means of the StorageOption object given to the SetupNewTable
1052
// constructor and/or by the aipsrc variables:
1053
// <ul>
1054
// <li> <src>table.storage.option</src> which can have the value
1055
// 'multifile', 'sepfile' (meaning separate files), or 'default'.
1056
// Currently the default is to use separate files.
1057
// <li> <src>table.storage.blocksize</src> defines the block size to be
1058
// used by a MultiFile. If 0 is given, the file system's block size
1059
// will be used.
1060
// </ul>
1061
// About all standard storage managers support the MultiFile.
1062
// The exception is StManAipsIO, because it is hardly ever used.
1063
//
1064
// Several storage managers exist, each with its own storage characteristics.
1065
// The default and preferred storage manager is <src>StandardStMan</src>.
1066
// Other storage managers should only be used if they pay off in
1067
// file space (like <src>IncrementalStMan</src> for slowly varying data)
1068
// or access speed (like the tiled storage managers for large data arrays).
1069
// <br>The storage managers store the data in a big or little endian
1070
// canonical format. The format can be specified when the table is created.
1071
// By default it uses the endian format as specified in the aipsrc variable
1072
// <code>table.endianformat</code> which can have the value local, big,
1073
// or little. The default is local.
1074
// <ol>
1075
// <li>
1076
// <linkto class="StandardStMan:description">StandardStMan</linkto>
1077
// stores all the values in so-called buckets (equally sized chunks
1078
// in the file). It requires little memory.
1079
// <br>It replaces the old <src>StManAipsIO</src>.
1080
//
1081
// <li>
1082
// <linkto class="IncrementalStMan:description">IncrementalStMan</linkto>
1083
// uses a storage mechanism resembling "incremental backups". A value
1084
// is only stored if it is different from the previous row. It is
1085
// very well suited for slowly varying data.
1086
// <br>The class <linkto class="ROIncrementalStManAccessor:description">
1087
// ROIncrementalStManAccessor</linkto> can be used to tune the
1088
// behaviour of the <src>IncrementalStMan</src>. It contains functions
1089
// to deal with the cache size and to show the behaviour of the cache.
1090
//
1091
// <li>
1092
// The <a href="#Tables:TiledStMan">Tiled Storage Managers</a>
1093
// store the data as a tiled hypercube allowing for more or less equally
1094
// efficient data access along all main axes. It can be used for
1095
// UV-data as well as for image data.
1096
//
1097
// <li>
1098
// <linkto class="StManAipsIO:description">StManAipsIO</linkto>
1099
// uses <src>AipsIO</src> to store the data in the columns.
1100
// It supports all table functionality, but its I/O is probably not
1101
// as efficient as other storage managers. It also requires that
1102
// a large part of the table fits in memory.
1103
// <br>It should not be used anymore, because it uses a lot of memory
1104
// for larger tables and because it is not very robust in case an
1105
// application or system crashes.
1106
//
1107
// <li>
1108
// <linkto class="MemoryStMan:description">MemoryStMan</linkto>
1109
// holds the data in memory. It means that data 'stored' with this
1110
// storage manager are NOT persistent.
1111
// <br>This storage manager is primarily meant for tables held in
1112
// memory, but it can also be useful for temporary columns in
1113
// normal tables. Note, however, that if a table is accessed
1114
// concurrently from multiple processes, MemoryStMan data cannot be
1115
// synchronized.
1116
// </ol>
1117
//
1118
// The storage manager framework makes it possible to support arbitrary files
1119
// as tables. This has been used in a case where a file is filled
1120
// by the data acquisition system of a telescope. The file is simultaneously
1121
// used as a table using a dedicated storage manager. The table
1122
// system and storage manager provide a sync function to synchronize
1123
// the processes, i.e. to make the table system aware of changes
1124
// in the file size (thus in the table size) by the filling process.
1125
//
1126
// <note role=tip>
1127
// Not all data managers support all the table functionality. So, the choice
1128
// of a data manager can greatly influence the type of operations you can do
1129
// on the table as a whole.
1130
// For example, if a column uses the tiled storage manager,
1131
// it is not possible to delete rows from the table, because that storage
1132
// manager will not support deletion of rows.
1133
// However, it is always possible to delete all columns of a data
1134
// manager in one single call.
1135
// </note>
1136
1137
// <ANCHOR NAME="Tables:TiledStMan">
1138
// <h3>Tiled Storage Manager</h3></ANCHOR>
1139
// The Tiled Storage Managers allow one to store the data of
1140
// one or more columns in a tiled way. Tiling means
1141
// that the data are stored without a preferred order to make access
1142
// along the different main axes equally efficient. This is done by
1143
// storing the data in so-called tiles (i.e. equally shaped subsets of an
1144
// array) to increase data locality. The user can define the tile shape
1145
// to optimize for the most frequently used access.
1146
// <p>
1147
// The Tiled Storage Manager has the following properties:
1148
// <ul>
1149
// <li> There can be more than one Tiled Storage Manager in
1150
// a table; each with its own (unique) name.
1151
// <li> Each Tiled Storage Manager can store an
1152
// N-dimensional so-called hypercolumn.
1153
// Elaborate hypercolumns can be defined using
1154
// <linkto file="TableDesc.h#defineHypercolumn">
1155
// TableDesc::defineHypercolumn</linkto>).
1156
// <br>Note that defining a hypercolumn is only necessary if it
1157
// contains multiple columns or if the TiledDataStMan is used.
1158
// It means that in practice it is hardly ever needed to define a
1159
// hypercolumn.
1160
// <br>A hypercolumn consists of up to three types of columns:
1161
// <dl>
1162
// <dt> Data columns
1163
// <dd> contain the data to be stored in a tiled way. This will
1164
// be done in tiled hypercubes.
1165
// There must be at least one data column.
1166
// <br> For example: a table contains UV-data with
1167
// data columns "Visibility" and "Weight".
1168
// <dt> Coordinate columns
1169
// <dd> define the world coordinates of the pixels in the data columns.
1170
// Coordinate columns are optional, but if given there must
1171
// be N coordinate columns for an N-dimensional hypercolumn.
1172
// <br>
1173
// For example: the data in the example above is 4-dimensional
1174
// and has coordinate columns "Time", "Baseline", "Frequency",
1175
// and "Polarization".
1176
// <dt> Id columns
1177
// <dd> are needed if TiledDataStMan is used.
1178
// Different rows in the data columns can be stored in different
1179
// hypercubes. The values in the id column(s) uniquely identify
1180
// the hypercube a row is stored in.
1181
// <br>
1182
// For example: the line and continuum data in a MeasurementSet
1183
// table need to be stored in 2 different hypercubes (because
1184
// their shapes are different (see below)). A column containing
1185
// the type (line or continuum) has to be used as an id column.
1186
// </dl>
1187
// <li> If multiple data columns are used, the shape of their data
1188
// must be conforming in each individual row.
1189
// If data in different rows have different shapes, they must be
1190
// stored in different hypercubes, because a hypercube can only hold
1191
// data with conforming shapes.
1192
// <br>
1193
// Thus in the example above, rows with line data will have conforming
1194
// shapes and can be stored in one hypercube. The continuum data
1195
// will have another shape and can be stored in another hypercube.
1196
// <br>
1197
// The storage manager keeps track of the mapping of rows to/from
1198
// hypercubes.
1199
// <li> Each hypercube can be tiled in its own way. It is not required
1200
// that an integer number of tiles fits in the hypercube. The last
1201
// tiles will be padded as needed.
1202
// <li> The last axis of a hypercube can be extensible. This means that
1203
// the size of that axis does not need to be defined when the
1204
// hypercube is defined in the storage manager. Instead, the hypercube
1205
// can be extended when another chunk of data has to be stored.
1206
// This can be very useful in, for example, a (quasi-)realtime
1207
// environment where the size of the time axis is not known.
1208
// <li> If coordinate columns are defined, they describe the coordinates
1209
// of the axes of the hypercubes. Each hypercube has its own set of
1210
// coordinates.
1211
// <li> Data and id columns have to be stored with the Tiled
1212
// Storage Manager. However, coordinate columns do not need to be
1213
// stored with the Tiled Storage Manager.
1214
// Especially in the case where the coordinates for a hypercube axis
1215
// are varying (i.e. dependent on other axes), another storage manager
1216
// has to be used (because the Tiled Storage Manager can only
1217
// hold constant coordinates).
1218
// </ul>
1219
// <p>
1220
// The following Tiled Storage Managers are available:
1221
// <dl>
1222
// <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto>
1223
// <dd> can be seen as a specialization of <src>TiledDataStMan</src>
1224
// by using the array shape as the id value.
1225
// Similarly to <src>TiledDataStMan</src> it can maintain multiple
1226
// hypercubes and store multiple rows in a hypercube, but it is
1227
// easier to use, because the special <src>addHypercube</src> and
1228
// <src>extendHypercube</src> functions are not needed.
1229
// An hypercube is automatically added when a new array shape is
1230
// encountered.
1231
// <br>
1232
// This storage manager could be used for a table with a column
1233
// containing line and continuum data, which will result
1234
// in 2 hypercubes.
1235
// <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto>
1236
// <dd> creates (automatically) a new hypercube for each row.
1237
// Thus each row of the hypercolumn is stored in a separate hypercube.
1238
// Note that the row number serves as the id value. So an id column
1239
// is not needed, although there are multiple hypercubes.
1240
// <br>
1241
// This storage manager is meant for tables where the data arrays
1242
// in the different rows are not accessed together. One can think
1243
// of a column containing images. Each row contains an image and
1244
// only one image is shown at a time.
1245
// <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto>
1246
// <dd> creates one hypercube for the entire hypercolumn. Thus all cells
1247
// in the hypercube have to have the same shape and therefore this
1248
// storage manager is only possible if all columns in the hypercolumn
1249
// have the attribute FixedShape.
1250
// <br>
1251
// This storage manager could be used for a table with a column
1252
// containing images for the Stokes parameters I, Q, U, and V.
1253
// By storing them in one hypercube, it is possible to retrieve
1254
// the 4 Stokes values for a subset of the image or for an individual
1255
// pixel in a very efficient way.
1256
// <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto>
1257
// <dd> allows one to control the creation and extension of hypercubes.
1258
// This is done by means of the class
1259
// <linkto class=TiledDataStManAccessor:description>
1260
// TiledDataStManAccessor</linkto>.
1261
// It makes it possible to store, say, row 0-9 in hypercube A,
1262
// row 10-34 in hypercube B, row 35-54 in hypercube A again, etc..
1263
// <br>
1264
// The drawback of this storage manager is that its hypercubes are not
1265
// automatically extended when adding new rows. The special functions
1266
// <src>addHypercube</src> and <src>extendHypercube</src> have to be
1267
// used making it somewhat tedious to use.
1268
// Therefore this storage manager may become obsolete in the near future.
1269
// </dl>
1270
// The Tiled Storage Managers have 3 ways to access and cache the data.
1271
// Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an
1272
// access choice and use it in a Table constructor.
1273
// <ul>
1274
// <li> The old way (the only way until January 2010) uses a cache
1275
// of its own to keep tiles that might need to be reused. It will always
1276
// access entire tiles, even if only a small part is needed.
1277
// It is possible to define a maximum cache size. The description of class
1278
// <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
1279
// contains a discussion about the effect of defining a maximum cache
1280
// size.
1281
// <li> Memory-mapping the data files. In this way the operating system
1282
// takes care of the IO and caching. However, the limited address space
1283
// may preclude using it for large tables on 32-bit systems.
1284
// <li> Use buffered IO and let the kernel's file cache take care of caching.
1285
// It will access the data in chunks of the given buffer size, so the
1286
// entire tile does not need to be accessed if only a small part is
1287
// needed.
1288
// </ul>
1289
// Apart from reading, all access ways described above can also handle writing
1290
// and extending tables. They create fully equal files. Both little and big
1291
// endian data can be read or written.
1292
1293
// <ANCHOR NAME="Tables:virtual column engines">
1294
// <h3>Virtual Column Engines</h3></ANCHOR>
1295
//
1296
// Virtual column engines are used to implement the virtual (i.e.
1297
// calculated-on-the-fly) columns. The Table system provides
1298
// an abstract base class (or "interface class")
1299
// <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto>
1300
// that specifies the protocol for these engines.
1301
// The programmer must derive a concrete class to implement
1302
// the application-specific virtual column.
1303
// <p>
1304
// For example: the programmer
1305
// needs a column in a table which is the difference between two other
1306
// columns. (Perhaps these two other columns are updated periodically
1307
// during the execution of a program.) A good way to handle this would
1308
// be to have a virtual column in the table, and write a virtual column
1309
// engine which knows how to calculate the difference between corresponding
1310
// cells of the two other columns. So the result is that accessing a
1311
// particular cell of the virtual column invokes the virtual column engine,
1312
// which then gets the values from the other two columns, and returns their
1313
// difference. This particular example could be done using
1314
// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>.
1315
// <p>
1316
// Several virtual column engines exist:
1317
// <ol>
1318
// <li> The class
1319
// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>
1320
// makes it possible to define a column as an arbitrary expression of
1321
// other columns. It uses the <a href="../notes/199.html">TaQL</a>
1322
// CALC command. The virtual column can be a scalar or an array and
1323
// can have one of the standard data types supported by the Table System.
1324
// <li> The class
1325
// <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto>
1326
// maps an integer bit flags column to a Bool column. A read and write mask
1327
// can be defined telling which bits to take into account when mapping
1328
// to and from Bool (thus when reading or writing the Bool).
1329
// <li> The class
1330
// <linkto class="CompressFloat:description">CompressFloat</linkto>
1331
// compresses a single precision floating point array by scaling the
1332
// values to shorts (16-bit integer).
1333
// <li> The class
1334
// <linkto class="CompressComplex:description">CompressComplex</linkto>
1335
// compresses a single precision complex array by scaling the
1336
// values to shorts (16-bit integer). In fact, the 2 parts of the complex
1337
// number are combined to an 32-bit integer.
1338
// <li> The class
1339
// <linkto class="CompressComplexSD:description">CompressComplexSD</linkto>
1340
// does the same as CompressComplex, but optimizes for the case where the
1341
// imaginary part is zero (which is often the case for Single Dish data).
1342
// <li> The double templated class
1343
// <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto>
1344
// scales the data in an array from, for example,
1345
// float to short before putting it.
1346
// <li> The double templated class
1347
// <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto>
1348
// converts the data from one data type to another. Sometimes it might be
1349
// needed to store the residual data in an MS in double precision.
1350
// Because the imaging task can only handle single precision, this enigne
1351
// can be used to map the data from double to single precision.
1352
// <li> The double templated class
1353
// <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto>
1354
// converts the data from one data type to another with the possibility
1355
// to reduce the number of dimensions. For example, it can be used to
1356
// store an 2-d array of StokesVector objects as a 3-d array of floats
1357
// by treating the 4 data elements as an extra array axis. If the
1358
// StokesVector class is simple, it can be done very efficiently.
1359
// <li> The class
1360
// <linkto class="ForwardColumnEngine:description">
1361
// ForwardColumnEngine</linkto>
1362
// forwards the gets and puts on a row in a column to the same row
1363
// in a column with the same name in another table. This provides
1364
// a virtual copy of the referenced column.
1365
// <li> The class
1366
// <linkto class="ForwardColumnIndexedRowEngine:description">
1367
// ForwardColumnIndexedRowEngine</linkto>
1368
// is similar to <src>ForwardColumnEngine.</src>.
1369
// However, instead of forwarding it to the same row it uses a
1370
// a column to map its row number to a row number in the referenced
1371
// table. In this way multiple rows can share the same data.
1372
// This data manager only allows for get operations.
1373
// <li> The calibration module has implemented a virtual column engine
1374
// to do on-the-fly calibration in a transparent way.
1375
// </ol>
1376
// To handle arbitrary data types the templated abstract base class
1377
// <linkto class="VSCEngine:description">VSCEngine</linkto>
1378
// has been written. An example of how to use this class can be
1379
// found in the demo program <src>dVSCEngine.cc</src>.
1380
1381
// <ANCHOR NAME="Tables:LockSync">
1382
// <h3>Table locking and synchronization</h3></ANCHOR>
1383
//
1384
// Multiple concurrent readers and writers (also via NFS) of a
1385
// table are supported by means of a locking/synchronization mechanism.
1386
// This mechanism is not very sophisticated in the sense that it is
1387
// very coarsely grained. When locking, the entire table gets locked.
1388
// A special lock file is used to lock the table. This lock file also
1389
// contains some synchronization data.
1390
// <p>
1391
// Five ways of locking are supported (see class
1392
// <linkto class=TableLock>TableLock</linkto>):
1393
// <dl>
1394
// <dt> TableLock::PermanentLocking(Wait)
1395
// <dd> locks the table permanently (from open till close). This means
1396
// that one writer OR multiple readers are possible.
1397
// <dt> TableLock::AutoLocking
1398
// <dd> does the locking automatically. This is the default mode.
1399
// This mode makes it possible that a table is shared amongst
1400
// processes without the user needing to write any special code.
1401
// It also means that a lock is only released when needed.
1402
// <dt> TableLock::AutoNoReadLocking
1403
// <dd> is similar to AutoLocking. However, no lock is acquired when
1404
// reading the table making it possible to read the table while
1405
// another process holds a write-lock. It also means that for read
1406
// purposes no automatic synchronization is done when the table is
1407
// updated in another process.
1408
// Explicit synchronization can be done by means of the function
1409
// <src>Table::resync</src>.
1410
// <dt> TableLock::UserLocking
1411
// <dd> requires that the programmer explicitly acquires and releases
1412
// a lock on the table. This makes some kind of transaction
1413
// processing possible. E.g. set a write lock, add a row,
1414
// write all data into the row and release the lock.
1415
// The Table functions <src>lock</src> and <src>unlock</src>
1416
// have to be used to acquire and release a (read or write) lock.
1417
// <dt> TableLock::UserNoReadLocking
1418
// <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking
1419
// no lock is needed to read the table.
1420
// <dt> TableLock::NoLocking
1421
// <dd> does not use table locking. It is the responsibility of the
1422
// user to ensure that no concurrent access is done on the same
1423
// bucket or tile in a storage manager, otherwise a table might
1424
// get corrupted.
1425
// <br>This mode is always used if Casacore is built with
1426
// -DAIPS_TABLE_NOLOCKING.
1427
// </dl>
1428
// Synchronization of the processes accessing the same table is done
1429
// by means of the lock file. When a lock is released, the storage
1430
// managers flush their data into the table files. Some synchronization data
1431
// is written into the lock file telling the new number of table rows
1432
// and telling which storage managers have written data.
1433
// This information is read when another process acquires the lock
1434
// and is used to determine which storage managers have to refresh
1435
// their internal caches.
1436
// <br>Note that for the NoReadLocking modes (see above) explicit
1437
// synchronization might be needed using <src>Table::resync</src>.
1438
// <p>
1439
// The function <src>Table::hasDataChanged</src> can be used to check
1440
// if a table is (being) changed by another process. In this way
1441
// a program can react on it. E.g. the table browser can refresh its
1442
// screen when the underlying table is changed.
1443
// <p>
1444
// In general the default locking option will do.
1445
// From the above it should be clear that heavy concurrent access
1446
// results in a lot of flushing, thus will have a negative impact on
1447
// performance. If uninterrupted access to a table is needed,
1448
// the <src>PermanentLocking</src> option should be used.
1449
// If transaction-like processing is done (e.g. updating a table
1450
// containing an observation catalogue), the <src>UserLocking</src>
1451
// option is probably best.
1452
// <p>
1453
// Creation or deletion of a table is not possible if that table
1454
// is still open in another process. The function
1455
// <src>Table::isMultiUsed()</src> can be used to check if a table
1456
// is open in other processes.
1457
// <br>
1458
// The function <src>deleteTable</src> should be used to delete
1459
// a table. Before deleting the table it ensures that it is writable
1460
// and that it is not open in the current or another process
1461
// <p>
1462
// The following example wants to read the table uninterrupted, thus it uses
1463
// the <src>PermanentLocking</src> option. It also wants to wait
1464
// until the lock is actually acquired.
1465
// Note that the destructor closes the table and releases the lock.
1466
// <srcblock>
1467
// // Open the table (readonly).
1468
// // Acquire a permanent (read) lock.
1469
// // It waits until the lock is acquired.
1470
// Table tab ("some.name",
1471
// TableLock(TableLock::PermanentLockingWait));
1472
// </srcblock>
1473
//
1474
// The following example uses the automatic locking..
1475
// It tells the system to check about every 20 seconds if another
1476
// process wants access to the table.
1477
// <srcblock>
1478
// // Open the table (readonly).
1479
// Table tab ("some.name",
1480
// TableLock(TableLock::AutoLocking, 20));
1481
// </srcblock>
1482
//
1483
// The following example gets data (say from a GUI) and writes it
1484
// as a row into the table. The lock the table as little as possible
1485
// the lock is acquired just before writing and released immediately
1486
// thereafter.
1487
// <srcblock>
1488
// // Open the table (writable).
1489
// Table tab ("some.name",
1490
// TableLock(TableLock::UserLocking),
1491
// Table::Update);
1492
// while (True) {
1493
// get input data
1494
// tab.lock(); // Acquire a write lock and wait for it.
1495
// tab.addRow();
1496
// write data into the row
1497
// tab.unlock(); // Release the lock.
1498
// }
1499
// </srcblock>
1500
//
1501
// The following example deletes a table if it is not used in
1502
// another process.
1503
// <srcblock>
1504
// Table tab ("some.name");
1505
// if (! tab.isMultiUsed()) {
1506
// tab.markForDelete();
1507
// }
1508
// </srcblock>
1509
1510
// <ANCHOR NAME="Tables:KeyLookup">
1511
// <h3>Table lookup based on a key</h3></ANCHOR>
1512
//
1513
// Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the
1514
// user a means to find the rows matching a given key or key range.
1515
// It is a somewhat primitive replacement of a B-tree index and in the
1516
// future it may be replaced by a proper B+-tree implementation.
1517
// <p>
1518
// The <src>ColumnsIndex</src> class makes it possible to build an
1519
// in-core index on one or more columns. Looking a key or key range
1520
// is done using a binary search on that index. It returns a vector
1521
// containing the row numbers of the rows matching the key (range).
1522
// <p>
1523
// The class is not capable of tracing changes in the underlying column(s).
1524
// It detects a change in the number of rows and updates the index
1525
// accordingly. However, it has to be told explicitly when a value
1526
// in the underlying column(s) changes.
1527
// <p>
1528
// The following example shows how the class can be used.
1529
// <example>
1530
// Suppose one has an antenna table with key ANTENNA.
1531
// <srcblock>
1532
// // Open the table and make an index for column ANTENNA.
1533
// Table tab("antenna.tab")
1534
// ColumnsIndex colInx(tab, "ANTENNA");
1535
// // Make a RecordFieldPtr for the ANTENNA field in the index key record.
1536
// // Its data type has to match the data type of the column.
1537
// RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
1538
// // Now loop in some way and find the row for the antenna
1539
// // involved in that loop.
1540
// Bool found;
1541
// while (...) {
1542
// // Fill the key field and get the row number.
1543
// // ANTENNA is a unique key, so only one row number matches.
1544
// // Otherwise function getRowNumbers had to be used.
1545
// *antFld = antenna;
1546
// uInt antRownr = colInx.getRowNumber (found);
1547
// if (!found) {
1548
// cout << "Antenna " << antenna << " is unknown" << endl;
1549
// } else {
1550
// // antRownr can now be used to get data from that row in
1551
// // the antenna table.
1552
// }
1553
// }
1554
// </srcblock>
1555
// </example>
1556
// <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more
1557
// advanced example. It shows how to use a private compare function
1558
// to adjust the lookup if the index does not contain single
1559
// key values, but intervals instead. This is useful if a row in
1560
// a (sub)table is valid for, say, a time range instead of a single
1561
// timestamp.
1562
1563
// <ANCHOR NAME="Tables:performance">
1564
// <h3>Performance and robustness considerations</h3></ANCHOR>
1565
//
1566
// The Table System resembles a database system, but it is not as robust.
1567
// It lacks the transaction and logging facilities common to data base systems.
1568
// It means that in case of a crash data might be lost.
1569
// To reduce the risk of data loss to
1570
// a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally
1571
// with an <tt>fsync</tt> to ensure that all data are really written.
1572
// However, that can degrade the performance because it involves extra writes.
1573
// So one should find the right balance between robustness and performance.
1574
//
1575
// To get a good feeling for the performance issues, it is important to
1576
// understand some of the internals of the Table System.
1577
// <br>The storage managers drive the performance. All storage managers use
1578
// buckets (called tiles for the TiledStMan) which contain the data.
1579
// All IO is done by bucket. The bucket/tile size is defined when creating
1580
// the storage manager objects. Sometimes the default will do, but usually
1581
// it is better to set it explicitly.
1582
//
1583
// It is best to do a flush when a tile is full.
1584
// For example: <br>
1585
// When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines
1586
// or N*(N+1) if auto-correlations are stored as well) it makes sense to
1587
// store, say, N/2 rows in a tile and do a flush each time all baselines
1588
// are written. In that way tiles are fully filled when doing the flush, so
1589
// no extra IO is involved.
1590
// <br>Here is some code showing this when creating a MeasurementSet.
1591
// The code should speak for itself.
1592
// <srcblock>
1593
// MS* createMS (const String& msName, int nrchan, int nrant)
1594
// {
1595
// // Get the MS main default table description.
1596
// TableDesc td = MS::requiredTableDesc();
1597
// // Add the data column and its unit.
1598
// MS::addColumnToDesc(td, MS::DATA, 2);
1599
// td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet().
1600
// define("UNIT","Jy");
1601
// // Store the DATA and FLAG column in two separate files.
1602
// // In this way accessing FLAG only is much cheaper than
1603
// // when combining DATA and FLAG.
1604
// // All data have the same shape, thus use TiledColumnStMan.
1605
// // Also store UVW with TiledColumnStMan.
1606
// Vector<String> tsmNames(1);
1607
// tsmNames[0] = MS::columnName(MS::DATA);
1608
// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1609
// td.defineHypercolumn("TiledData", 3, tsmNames);
1610
// tsmNames[0] = MS::columnName(MS::FLAG);
1611
// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1612
// td.defineHypercolumn("TiledFlag", 3, tsmNames);
1613
// tsmNames[0] = MS::columnName(MS::UVW);
1614
// td.defineHypercolumn("TiledUVW", 2, tsmNames);
1615
// // Setup the new table.
1616
// SetupNewTable newTab(msName, td, Table::New);
1617
// // Most columns vary slowly and use the IncrStMan.
1618
// IncrementalStMan incrStMan("ISMData");
1619
// // A few columns use he StandardStMan (set an appropriate bucket size).
1620
// StandardStMan stanStMan("SSMData", 32768);
1621
// // Store all pol and freq and some rows in a single tile.
1622
// // autocorrelations are written, thus in total there are
1623
// // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an
1624
// // integer number of tiles.
1625
// TiledColumnStMan tiledData("TiledData",
1626
// IPosition(3,4,nchan,(nrant+1)/2));
1627
// TiledColumnStMan tiledFlag("TiledFlag",
1628
// IPosition(3,4,nchan,8*(nrant+1)/2));
1629
// TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,));
1630
// IPosition(2,3,nrant*(nrant+1)/2));
1631
// newTab.bindAll (incrStMan);
1632
// newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan);
1633
// newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan);
1634
// newTab.bindColumn(MS::columnName(MS::DATA),tiledData);
1635
// newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag);
1636
// newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW);
1637
// // Create the MS and its subtables.
1638
// // Get access to its columns.
1639
// MS* msp = new MeasurementSet(newTab);
1640
// // Create all subtables.
1641
// // Do this after the creation of optional subtables,
1642
// // so the MS will know about those optional sutables.
1643
// msp->createDefaultSubtables (Table::New);
1644
// return msp;
1645
// }
1646
// </srcblock>
1647
1648
// <h4>Some more performance considerations</h4>
1649
// Which storage managers to use and how to use them depends heavily on
1650
// the type of data and the access patterns to the data. Here follow some
1651
// guidelines:
1652
// <ol>
1653
// <li> Scalar data can be stored with the StandardStMan (SSM) or
1654
// IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column
1655
// in a MeasurementSet) it is best to use the ISM. Otherwise the SSM.
1656
// Note that very long strings (longer than the bucketsize) can only
1657
// be stored with the SSM.
1658
// <li> Any number of storage managers can be used. In fact, each column
1659
// can have a storage manager of its own resulting in column-wise
1660
// stored data which is more and more used in data base systems.
1661
// In that way a query or sort on that column is very fast, because
1662
// the buckets to read only contain data of that column.
1663
// In practice one can decide to combine a few frequently used columns
1664
// in a storage manager.
1665
// <li> Array data can be stored with any column manager. Small fixed size
1666
// arrays can be stored directly with the SSM
1667
// (or ISM if not changing much).
1668
// However, they can also be stored with a TiledStMan (TSM) as shown
1669
// for the UVW column in the example above.
1670
// <br> Large arrays should usually be stored with a TSM. However,
1671
// if it must be possible to change the shape of an array after it
1672
// was stored, the SSM (or ISM) must be used. Note that in that
1673
// case a lot of disk space can be wasted, because the SSM and ISM
1674
// store the array data at the end of the file if the array got
1675
// bigger and do not reuse the old space. The only way to
1676
// reclaim it is by making a deep copy of the entire table.
1677
// <li> If an array is stored with a TSM, it is important to decide
1678
// which TSM to use.
1679
// <ol>
1680
// <li> The TiledColumnStMan is the most efficient, but only suitable
1681
// for arrays having the same shape in the entire column.
1682
// <li> The TiledShapeStMan is suitable for columns where the arrays
1683
// can have a few shapes.
1684
// <li> The TiledCellStMan is suitable for columns where the arrays
1685
// can have many different shapes.
1686
// </ol>
1687
// This is discussed in more detail
1688
// <a href="#Tables:TiledStMan">above</a>.
1689
// <li> If storing an array with a TSM, it can be very important to
1690
// choose the right tile shape. Not only does this define the size
1691
// of a tile, but it also defines if access in other directions
1692
// than the natural direction can be fast. It is also discussed in
1693
// more detail <a href="#Tables:TiledStMan">above</a>.
1694
// <li> Columns can be combined in a single TiledStMan. For instance, combining DATA
1695
// and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG
1696
// is used on its own (e.g. in combination with CORRECTED_DATA), it is better
1697
// to separate them, otherwise tiles containing FLAG also contain DATA making the
1698
// tiles much bigger, thus more expensive to access.
1699
// </ol>
1700
//
1701
// <ANCHOR NAME="Tables:iotracing">
1702
// <h4>IO Tracing</h4></ANCHOR>
1703
//
1704
// Several forms of tracing can be done to see how the Table I/O performs.
1705
// <ul>
1706
// <li> On Linux/UNIX systems the <src>strace</src> command can be used to
1707
// collect trace information about the physical IO.
1708
// <li> The function <src>showCacheStatistics</src> in class
1709
// TiledStManAccessor can be used to show the number of actual reads
1710
// and writes and the percentage of cache hits.
1711
// <li> The software has some options to trace the operations done on
1712
// tables. It is possible to specify the columns and/or the operations
1713
// to be traced. The following <src>aipsrc</src> variables can be used.
1714
// <ul>
1715
// <li> <src>table.trace.filename</src> specifies the file to write the
1716
// trace output to. If not given or empty, no tracing will be done.
1717
// The file name can contain environment variables or a tilde.
1718
// <li> <src>table.trace.operation</src> specifies the operations to be
1719
// traced. It is a string containing s, r, and/or w where
1720
// s means tracing RefTable construction (selection/sort),
1721
// r means column reads, and w means column writes.
1722
// If empty, only the high level table operations (open, create, close)
1723
// will be traced.
1724
// <li> <src>table.trace.columntype</src> specifies the types of columns to
1725
// be traced. It is a string containing the characters s, a, and/or r.
1726
// s means all scalar columns, a all array columns, and r all record
1727
// columns. If empty and if <src>table.trace.column</src> is empty,
1728
// its default value is a.
1729
// <li> <src>table.trace.column</src> specifies names of columns to be
1730
// traced. Its value can be one or more glob-like patterns separated
1731
// by commas without any whitespace. The default is empty.
1732
// For example:
1733
// <srcblock>
1734
// table.trace.column: *DATA,FLAG,WEIGHT*
1735
// </srcblock>
1736
// to trace all DATA, the FLAG, and all WEIGHT columns.
1737
// </ul>
1738
// The trace output is a text file with the following columns
1739
// separated by a space.
1740
// <ul>
1741
// <li> The UTC time the trace line was written (with msec accuracy).
1742
// <li> The operation: n(ew), o(pen), c(lose), t(able), r(ead), w(rite),
1743
// s(election/sort/iter), p(rojection).
1744
// t means an arbitrary table operation as given in the name column.
1745
// <li> The table-id (as t=i) given at table creation (new) or open.
1746
// <li> The table name, column name, or table operation
1747
// (as <src>*oper*</src>).
1748
// <src>*reftable*</src> means that the operation is on a RefTable
1749
// (thus result of selection, sort, projection, or iteration).
1750
// <li> The row or rows to access (* means all rows).
1751
// Multiple rows are given as a series of ranges like s:e:i,s:e:i,...
1752
// where e and i are only given if applicable (default i is 1).
1753
// Note that e is inclusive and defaults to s.
1754
// <li> The optional array shape to access (none means scalar).
1755
// In case multiple rows are accessed, the last shape value is the
1756
// number of rows.
1757
// <li> The optional slice of the array in each row as [start][end][stride].
1758
// </ul>
1759
// Shape, start, end, and stride are given in Fortran-order as
1760
// [n1,n2,...].
1761
// </ul>
1762
1763
// <ANCHOR NAME="Tables:applications">
1764
// <h4>Applications to inspect/manipulate a table</h4></ANCHOR>
1765
// <ul>
1766
// <li><em>showtableinfo</em> shows the structure of a table. It can show:
1767
// <ul>
1768
// <li> the columns and their format (optionally sorted on name)
1769
// <li> the data managers used to store the column data
1770
// <li> the table and/or column keywords and their values
1771
// <li> recursively the same info of the subtables
1772
// </ul>
1773
// <li><em>showtablelock</em> if a table is locked or opened and by
1774
// which process.
1775
// <li><em>lsmf</em> shows the virtual files contained in a MultiFile.
1776
// <li><em>tomf</em> copies the given files to a MultiFile.
1777
// <li><em>taql</em> can be used to query a table using the
1778
// <a href="../notes/199.html">Table Query Language</a> (TaQL).
1779
// </ul>
1780
//
1781
// </synopsis>
1782
// </module>
1783
1784
1785
1786
}
//# NAMESPACE CASACORE - END
1787
1788
#endif
casacore
this file contains all the compiler specific defines
Definition:
mainpage.dox:28
Generated by
1.8.17