From 5367edb8b37a6dac76ac7cd608ac3c005b7b225e Mon Sep 17 00:00:00 2001 From: Cody Hiar Date: Sun, 10 Oct 2021 11:13:47 -0600 Subject: Work in progress --- wordcount/src/bolts/__init__.py | 0 wordcount/src/bolts/wordcount.py | 26 ++++++++++++++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 wordcount/src/bolts/__init__.py create mode 100644 wordcount/src/bolts/wordcount.py (limited to 'wordcount/src/bolts') diff --git a/wordcount/src/bolts/__init__.py b/wordcount/src/bolts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/wordcount/src/bolts/wordcount.py b/wordcount/src/bolts/wordcount.py new file mode 100644 index 0000000..85fbfb7 --- /dev/null +++ b/wordcount/src/bolts/wordcount.py @@ -0,0 +1,26 @@ +import os +from collections import Counter + +from streamparse import Bolt + + +class WordCountBolt(Bolt): + outputs = ["word", "count"] + + def initialize(self, conf, ctx): + self.counter = Counter() + self.pid = os.getpid() + self.total = 0 + + def _increment(self, word, inc_by): + self.counter[word] += inc_by + self.total += inc_by + + def process(self, tup): + word = tup.values[0] + self._increment(word, 10 if word == "dog" else 1) + if self.total % 1000 == 0: + self.logger.info( + f"counted [{self.total:,}] words [pid={self.pid}]" + ) + self.emit([word, self.counter[word]]) -- cgit v1.2.3