Details
-
Type:
Improvement
-
Status: Done
-
Resolution: Done
-
Fix Version/s: None
-
Component/s: daf_butler
-
Labels:
-
Story Points:2
-
Epic Link:
-
Sprint:DB_S22_12
-
Team:Data Access and Database
Description
A community post reports that if someone runs multi-threaded multi-node ingest having never before ingested any data, the table creation code fails.
lsst.ingest INFO: Successfully extracted metadata from 33 files with 0 failures
|
lsst.daf.butler.cli.utils ERROR: Caught an exception, details are in traceback:
|
Traceback (most recent call last):
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1802, in _execute_context
|
self.dialect.do_execute(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 732, in do_execute
|
cursor.execute(statement, parameters)
|
psycopg2.errors.UniqueViolation: duplicate key value violates unique constraint "pg_type_typname_nsp_index"
|
DETAIL: Key (typname, typnamespace)=(dataset_tags_00000004, 2200) already exists.
|
|
|
The above exception was the direct cause of the following exception:
|
|
Traceback (most recent call last):
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/stack/miniconda3-py38_4.9.2-1.0.0/Linux64/obs_base/gcaa7f91c06+32dde6e53f/python/lsst/obs/base/cli/cmd/commands.py", line 138, in ingest_raws
|
script.ingestRaws(*args, **kwargs)
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/stack/miniconda3-py38_4.9.2-1.0.0/Linux64/obs_base/gcaa7f91c06+32dde6e53f/python/lsst/obs/base/script/ingestRaws.py", line 85, in ingestRaws
|
ingester.run(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/stack/miniconda3-py38_4.9.2-1.0.0/Linux64/utils/g63a1f4f1ec+494d45cfd3/python/lsst/utils/timer.py", line 339, in timeMethod_wrapper
|
res = func(self, *args, **keyArgs)
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/stack/miniconda3-py38_4.9.2-1.0.0/Linux64/obs_base/gcaa7f91c06+32dde6e53f/python/lsst/obs/base/ingest.py", line 1183, in run
|
new_refs, bad, n_exp, n_exp_fail, n_ingest_fail = self.ingestFiles(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/stack/miniconda3-py38_4.9.2-1.0.0/Linux64/obs_base/gcaa7f91c06+32dde6e53f/python/lsst/obs/base/ingest.py", line 1013, in ingestFiles
|
self.butler.registry.registerDatasetType(self.datasetType)
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/stack/miniconda3-py38_4.9.2-1.0.0/Linux64/daf_butler/ge8d8315da9+47328fee21/python/lsst/daf/butler/registries/sql.py", line 381, in registerDatasetType
|
_, inserted = self._managers.datasets.register(datasetType)
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/stack/miniconda3-py38_4.9.2-1.0.0/Linux64/daf_butler/ge8d8315da9+47328fee21/python/lsst/daf/butler/registry/datasets/byDimensions/_manager.py", line 282, in register
|
tags = self._db.ensureTableExists(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/stack/miniconda3-py38_4.9.2-1.0.0/Linux64/daf_butler/ge8d8315da9+47328fee21/python/lsst/daf/butler/registry/interfaces/_database.py", line 1020, in ensureTableExists
|
table.create(connection)
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/sql/schema.py", line 950, in create
|
bind._run_ddl_visitor(ddl.SchemaGenerator, self, checkfirst=checkfirst)
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 2113, in _run_ddl_visitor
|
visitorcallable(self.dialect, self, **kwargs).traverse_single(element)
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/sql/visitors.py", line 524, in traverse_single
|
return meth(obj, **kw)
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/sql/ddl.py", line 893, in visit_table
|
self.connection.execute(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1289, in execute
|
return meth(self, multiparams, params, _EMPTY_EXECUTION_OPTS)
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/sql/ddl.py", line 80, in _execute_on_connection
|
return connection._execute_ddl(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1381, in _execute_ddl
|
ret = self._execute_context(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1845, in _execute_context
|
self._handle_dbapi_exception(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 2026, in _handle_dbapi_exception
|
util.raise_(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/util/compat.py", line 207, in raise_
|
raise exception
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/engine/base.py", line 1802, in _execute_context
|
self.dialect.do_execute(
|
File "/gscratch/astro/stevengs/lsst_stacks/stacks/w.2022.06/env/lsst-w.2022.06/lib/python3.8/site-packages/sqlalchemy/engine/default.py", line 732, in do_execute
|
cursor.execute(statement, parameters)
|
sqlalchemy.exc.IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "pg_type_typname_nsp_index"
|
DETAIL: Key (typname, typnamespace)=(dataset_tags_00000004, 2200) already exists.
|
|
[SQL:
|
CREATE TABLE public.dataset_tags_00000004 (
|
dataset_type_id BIGINT NOT NULL,
|
dataset_id UUID,
|
collection_id BIGINT,
|
instrument VARCHAR(16) NOT NULL,
|
detector BIGINT NOT NULL,
|
exposure BIGINT NOT NULL,
|
PRIMARY KEY (dataset_id, collection_id),
|
CONSTRAINT dataset_tags_00000004_unq_dataset_type_id_collection_i_0ba9af29 UNIQUE (dataset_type_id, collection_id, instrument, detector, exposure),
|
CONSTRAINT fkey_dataset_tags_00000004_dataset_type_id_dataset_type_id FOREIGN KEY(dataset_type_id) REFERENCES public.dataset_type (id),
|
CONSTRAINT fkey_dataset_tags_00000004_dataset_id_dataset_id FOREIGN KEY(dataset_id) REFERENCES public.dataset (id) ON DELETE CASCADE,
|
CONSTRAINT fkey_dataset_tags_00000004_collection_collection_id_co_a5ba3b28 FOREIGN KEY(collection_id) REFERENCES public.collection (collection_id) ON DELETE CASCADE,
|
CONSTRAINT fkey_dataset_tags_00000004_collection_summary_dataset__b377058f FOREIGN KEY(collection_id, dataset_type_id) REFERENCES public.collection_summary_dataset_type (collection_id, dataset_type_id),
|
CONSTRAINT fkey_dataset_tags_00000004_instrument_name_instrument FOREIGN KEY(instrument) REFERENCES public.instrument (name),
|
CONSTRAINT fkey_dataset_tags_00000004_collection_summary_instrume_5947d29e FOREIGN KEY(collection_id, instrument) REFERENCES public.collection_summary_instrument (collection_id, instrument),
|
CONSTRAINT fkey_dataset_tags_00000004_detector_instrument_id_inst_1d3d0e23 FOREIGN KEY(instrument, detector) REFERENCES public.detector (instrument, id),
|
CONSTRAINT fkey_dataset_tags_00000004_exposure_instrument_id_inst_0237f157 FOREIGN KEY(instrument, exposure) REFERENCES public.exposure (instrument, id)
|
)
|
|
]
|
(Background on this error at: https://sqlalche.me/e/14/gkpj)
|
Attachments
Issue Links
- links to
That would need a schema change. Another option would be to lock the entire dataset_type table and check whether tags/calib tables are already there. This is potentially expensive, but if we don't try to register new dataset types very frequently, it may be acceptable.