Refactor global options to local options.

Global options moved to main. All functions now take whatever options they need as parameters.
rhiever · Mar 3, 2013 · aa0b936 · bboe · Mar 3, 2013 · rhiever
1 parent 0677976
commit aa0b936
Showing 1 changed file with 83 additions and 33 deletions.
diff --git a/word_freqs.py b/word_freqs.py
@@ -44,12 +44,12 @@
 # put words here that you don't want to include in the word cloud
 excludedWords = ["/", "--", "...", "deleted", ")x"]
 
-# Global Variable Initialization
-options = None
-
 
 def parse_cmd_line():
-    # command-line argument parsing
+    """
+        command-line argument parsing
+    """
+
     usage = ("usage: %prog [options] USERNAME TARGET\n\n"
              "USERNAME sets your Reddit username for the bot\n"
              "TARGET sets the subreddit or user to count word frequencies for."
@@ -89,7 +89,6 @@ def parse_cmd_line():
                             "selftext, comment body) rather than incrementing"
                             "the total for for each instance."))
 
-    global options
     options, args = parser.parse_args()
 
     if len(args) != 2:
@@ -106,11 +105,21 @@ def parse_cmd_line():
     if options.period not in ["day", "week", "month", "year", "all"]:
         parser.error("Invalid period.")
 
-    return user, target
+    return user, target, options, args
 
 
-def parseText(text):
-    """Parse the passed in text and add words that are not common."""
+def parseText(text, count_word_freqs, max_threshold):
+    """
+        Parse the passed in text and add words that are not common.
+        
+        :arg `count_word_freqs`: only count a word once per text block (title,
+                                 selftext, comment body) rather than incrementing
+                                 the total for for each instance.
+        
+        :arg `max_threshold`: maximum relative frequency in the text a word can
+                              appear to be considered in word counts.
+                              prevents word spamming in a single submission.
+    """
     total = 0.0  # intentionally a float
     text_words = defaultdict(int)
     for word in text.split():  # Split on all whitespace
@@ -121,50 +130,85 @@ def parseText(text):
 
     # Add to popularWords list
     for word, count in text_words.items():
-        if count / total <= options.max_threshold:
-            if options.count_word_freqs:
+        if count / total <= max_threshold:
+            if count_word_freqs:
                 popularWords[word] += count
             else:
                 popularWords[word] += 1
 
 
-def processRedditor(redditor):
-    """Parse submissions and comments for the given Redditor."""
-    for entry in with_status(redditor.get_overview(limit=options.limit)):
+def processRedditor(redditor, limit, count_word_freqs, max_threshold):
+    """
+        Parse submissions and comments for the given Redditor.
+        
+        :arg `limit`: the maximum number of submissions to scrape from the subreddit
+        
+        :arg `count_word_freqs`: only count a word once per text block (title,
+                                 selftext, comment body) rather than incrementing
+                                 the total for for each instance.
+        
+        :arg `max_threshold`: maximum relative frequency in the text a word can
+                              appear to be considered in word counts.
+                              prevents word spamming in a single submission.
+        
+    """
+    for entry in with_status(redditor.get_overview(limit=limit)):
         if isinstance(entry, praw.objects.Comment):  # Parse comment
-            parseText(entry.body)
+            parseText(entry.body, count_word_freqs, max_threshold)
         else:  # Parse submission
-            processSubmission(entry, include_comments=False)
+            processSubmission(entry, count_word_freqs, max_threshold, include_comments=False)
 
 
-def processSubmission(submission, include_comments=True):
-    """Parse a submission's text and body (if applicable).
-
-    Include the submission's comments when `include_comments` is True.
+def processSubmission(submission, count_word_freqs, max_threshold, include_comments=True):
+    """
+        Parse a submission's text and body (if applicable).
+
+        :arg `count_word_freqs`: only count a word once per text block (title,
+                                 selftext, comment body) rather than incrementing
+                                 the total for for each instance.
+        
+        :arg `max_threshold`: maximum relative frequency in the text a word can
+                              appear to be considered in word counts.
+                              prevents word spamming in a single submission.
+        
+        :arg `include_comments`: include the submission's comments when True
 
     """
     if include_comments:  # parse all the comments for the submission
         submission.replace_more_comments()
         for comment in praw.helpers.flatten_tree(submission.comments):
-            parseText(comment.body)
+            parseText(comment.body, count_word_freqs, max_threshold)
 
     # parse the title of the submission
-    parseText(submission.title)
+    parseText(submission.title, count_word_freqs, max_threshold)
 
     # parse the selftext of the submission (if applicable)
     if submission.is_self:
-        parseText(submission.selftext)
+        parseText(submission.selftext, count_word_freqs, max_threshold)
 
 
-def processSubreddit(subreddit):
-    """Parse comments, title text, and selftext in a given subreddit."""
+def processSubreddit(subreddit, period, limit, count_word_freqs, max_threshold):
+    """
+        Parse comments, title text, and selftext in a given subreddit.
+        
+        :arg `period`: the time period to scrape the subreddit over (day, week, month, etc.)
+        
+        :arg `limit`: the maximum number of submissions to scrape from the subreddit
+        
+        :arg `count_word_freqs`: only count a word once per text block (title,
+                                 selftext, comment body) rather than incrementing
+                                 the total for for each instance.
+        
+        :arg `max_threshold`: maximum relative frequency in the text a word can
+                              appear to be considered in word counts.
+                              prevents word spamming in a single submission.
+    """
 
     # determine period to count the words over
-    params = {'t': options.period}
-    for submission in with_status(subreddit.get_top(limit=options.limit,
-                                                    params=params)):
+    params = {'t': period}
+    for submission in with_status(subreddit.get_top(limit=limit, params=params)):
         try:
-            processSubmission(submission)
+            processSubmission(submission, count_word_freqs, max_threshold)
         except HTTPError as exc:
             sys.stderr.write("\nSkipping submission {0} due to HTTP status {1}"
                              " error. Continuing...\n"
@@ -173,7 +217,9 @@ def processSubreddit(subreddit):
 
 
 def with_status(iterable):
-    """Wrap an iterable outputing '.' for each item (up to 50 a line)."""
+    """
+        Wrap an iterable outputting '.' for each item (up to 100 per line).
+    """
     for i, item in enumerate(iterable):
         sys.stderr.write('.')
         sys.stderr.flush()
@@ -183,7 +229,9 @@ def with_status(iterable):
 
 
 def main():
-    user, target = parse_cmd_line()
+
+    # parse the command-line options and arguments
+    user, target, options, args = parse_cmd_line()
 
     # open connection to Reddit
     r = praw.Reddit(user_agent="bot by /u/{0}".format(user))
@@ -196,12 +244,14 @@ def main():
     target = target[3:]
 
     if options.is_subreddit:
-        processSubreddit(r.get_subreddit(target))
+        processSubreddit(r.get_subreddit(target), options.period, options.limit,
+                         options.count_word_freqs, options.max_threshold)
     else:
-        processRedditor(r.get_redditor(target))
+        processRedditor(r.get_redditor(target), options.limit,
+                        options.count_word_freqs, options.max_threshold)
 
     # build a string containing all the words for the word cloud software
-    output = ""
+    output = "\n"
 
     # open output file to store the output string
     outFileName = target + ".csv"