You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by GitBox <gi...@apache.org> on 2022/07/11 17:12:11 UTC

[GitHub] [airflow] bbovenzi commented on a diff in pull request #24969: WIP - Move dataset dagrun creation to scheduler main

bbovenzi commented on code in PR #24969:
URL: https://github.com/apache/airflow/pull/24969#discussion_r918171500


##########
airflow/migrations/versions/0114_2_4_0_add_dataset_model.py:
##########
@@ -118,17 +118,46 @@ def _create_dataset_dag_run_queue_table():
     )
 
 
+def _create_dataset_event_table():
+    op.create_table(
+        'dataset_event',
+        sa.Column('id', Integer, primary_key=True, autoincrement=True),
+        sa.Column('dataset_id', Integer, nullable=False),
+        sa.Column('extra', ExtendedJSON, nullable=True),
+        sa.Column('task_id', String(250), nullable=True),
+        sa.Column('dag_id', String(250), nullable=True),
+        sa.Column('run_id', String(250), nullable=True),
+        sa.Column('map_index', sa.Integer(), nullable=True, server_default='-1'),
+        sa.Column('created_at', TIMESTAMP, nullable=False),
+        sqlite_autoincrement=True,  # ensures PK values not reused
+    )
+    op.create_index('idx_dataset_id_created_at', 'dataset_event', ['dataset_id', 'created_at'])
+
+
+def _create_dataset_event_dag_run_table():
+    op.create_table(
+        'dataset_event_dag_run',
+        sa.Column('dataset_event_id', Integer, nullable=False),
+        sa.Column('dag_run_id', Integer, nullable=False),

Review Comment:
   I feel like we also want `dag_id` here, no?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@airflow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org