Currently, I have a bunch of luigi tasks queued together, with a simple dependency chain( a -> b -> c -> d
). d
gets executed first, and
I use this to forcibly regenerate output without needing to remove it first, and allow you to select which types to regenerate. In our use case, we want the old generated files to continue to exist until they are rewritten with fresh versions.
# generation.py
class ForcibleTask(luigi.Task):
force_task_families = luigi.ListParameter(
positional=False, significant=False, default=[]
)
def complete(self):
print("{}: check {}".format(self.get_task_family(), self.output().path))
if not self.output().exists():
self.oldinode = 0 # so any new file is considered complete
return False
curino = pathlib.Path(self.output().path).stat().st_ino
try:
x = self.oldinode
except AttributeError:
self.oldinode = curino
if self.get_task_family() in self.force_task_families:
# only done when file has been overwritten with new file
return self.oldinode != curino
return self.output().exists()
class Generate(ForcibleTask):
date = luigi.DateParameter()
def output(self):
return luigi.LocalTarget(
self.date.strftime("generated-%Y-%m-%d")
)
luigi --module generation Generate '--Generate-force-task-families=["Generate"]'