refactor

HalCanary · HalCanary · commit 14678db48e42 · 2023-11-07T21:44:21.000-05:00
diff --git a/Makefile b/Makefile
@@ -1,5 +1,5 @@
 all:
-	./test_bioscript.py
 	./test_pycodestyle.py
+	./test_bioscript.py
 	./test_ranked_match.py
 .PHONY: all
diff --git a/README.md b/README.md
@@ -2,14 +2,17 @@
 
 Biology Scripts
 
-*Copyright 2023 Hal W Canary III, Lindsay R Saunders PhD.*  
+*Copyright 2023 Hal W Canary III, Lindsay R Saunders PhD.*
 *Use of this program is governed by contents of the LICENSE file.*
 
 * * *
 
 ## Install
 
-1.  Install Python (version 3.) <https://www.python.org/downloads/>
+0.  Open a terminal.  For MacOS, get the instructions here:
+    <https://google.com/search?q=OPEN+MACOS+TERMINAL>
+
+1.  Install Python (version ≥ 3.) <https://www.python.org/downloads/>
 
     Verify that the command
 
@@ -19,7 +22,10 @@ Biology Scripts
 
     works.
 
-2.  Clone this repo:
+2.  Install `git`.  Check if it is installed with the command `git --version` .
+    "If you don’t have it installed already, it will prompt you to install it."
+
+3.  Clone this repo with `git`:
 
     ```
     cd ~
@@ -33,6 +39,9 @@ Biology Scripts
     git clone git@github.com:HalCanary/bioscript.git
     ```
 
+    This will install into the directory `~/bioscript/` .
+
+
 * * *
 
 ## Unit Testing
@@ -91,3 +100,70 @@ What it does:
     4.  Use the one with the longer sequence.
 
 * * *
+
+## Running `ranked_match.py`
+
+To run the ranked match program, first install `python3` and `~/bioscript` as
+described above.  Then, from a Terminal, first navigate to the directory where
+your rankings CSV file is located.
+
+```
+$ cd ~/Desktop
+$
+```
+
+(_Here, the string "`$`" represents your entire prompt.  You don't type it,
+only the text after the `$`._)
+
+To see what files are located here, use the `ls` command.  For example:
+
+```
+$ ls
+Bio212_FA20_Topic_Rankings.csv	Bio212_FA20_Topic_Rankings_OUTPUT.txt
+$
+```
+
+To produce rankings:
+
+```
+$ ~/bioscript/ranked_match.py Bio212_FA20_Topic_Rankings.csv
+Francesca Russo    topic=36 (ranked=1)
+Jamie Costa        topic=68 (ranked=1)
+Robin Ortega       topic=37 (ranked=1)
+Kobe Davis         topic=77 (ranked=1)
+Mia Beard          topic=7 (ranked=1)
+Nathanael Rangel   topic=76 (ranked=2)
+Gloria Conley      topic=55 (ranked=1)
+Marvin Richmond    topic=13 (ranked=1)
+Whitney Wang       topic=6 (ranked=1)
+Cohen Aguilar      topic=19 (ranked=1)
+Josie Rodriguez    topic=72 (ranked=1)
+Henry Zimmerman    topic=34 (ranked=1)
+Ariyah Valdez      topic=10 (ranked=3)
+Kyler McDaniel     topic=35 (ranked=2)
+Dahlia Taylor      topic=20 (ranked=3)
+Jackson Avalos     topic=38 (ranked=1)
+Paloma Williams    topic=52 (ranked=2)
+Oliver Moon        topic=57 (ranked=1)
+Naya Riley         topic=63 (ranked=3)
+Amari Fischer      topic=53 (ranked=2)
+Maci Fuentes       topic=54 (ranked=5)
+Bowen Potter       topic=24 (ranked=2)
+Rory Pollard       topic=50 (ranked=3)
+Jad Warren         topic=65 (ranked=3)
+Sloane Pham        topic=27 (ranked=1)
+Russell Tapia      topic=69 (ranked=2)
+Michaela Rodriguez topic=43 (ranked=1)
+Henry Archer       topic=49 (ranked=1)
+Kadence Lyons      topic=71 (ranked=1)
+Cyrus Ball         topic=None (ranked=2)
+Abby Decker        topic=None (ranked=2)
+```
+
+Alternatively, to save the output to a text file:
+
+```
+$ ~/bioscript/ranked_match.py Bio212_FA20_Topic_Rankings.csv > Bio212_FA20_Topic_Rankings_OUTPUT.txt
+```
+
+* * *
diff --git a/ranked_match.py b/ranked_match.py
@@ -28,110 +28,95 @@
    27,   40,   17,   4
    1,    1,    3,    34
    3,    19,   7,    4
-
-To Run:
-
-    Lindsays-MBP:~ Lindsay$ .../ranked_match.py  PATH_TO_CSV_FILE
 '''
 
 
+import argparse
+import collections
 import csv
 import sys
 import os
 
 
-class Student:
-    def __init__(self, name, prefs, rank):
-        self.name = name
-        self.prefs = [p for p in prefs if p]
-        self.rank = rank
-        self.topic = ''
-        self.choice = 0
-
-    def __str__(self):
-        return '%s %r' % (self.name, self.prefs)
+Result = collections.namedtuple('Result', ['name', 'topic', 'choice'])
 
 
 def parseInteger(s):
-    s = s.replace('\xCA', '').strip()
+    s = s.strip('\uFEFF').strip().strip('\xCA')
     try:
         return int(s)
     except ValueError:
-        if len(s):
-            raise
-        return None
+        return s if len(s) else None
 
 
 def parseCSVFile(infile):
     reader = csv.reader(infile)
-    names = [n.strip().strip('\uFEFF') for n in next(reader)]
-    num_names = len(names)
-    inverse_prefs = []
+    data = []
     for line in reader:
-        assert len(line) <= num_names
         line = [parseInteger(x) for x in line]
-        if not any(line):
-            continue
-        if len(line) < num_names:
-            line.extend([None for x in range(len(line), num_names)])
-        inverse_prefs.append(line)
-    return (names, inverse_prefs)
-
-
-def rangedMatch(names, inverse_prefs):
-    all_prefs = [list(x) for x in zip(*inverse_prefs)]
-
-    number_topics = max(topic for prefs in all_prefs for topic in prefs if topic is not None)
-
-    assert number_topics >= len(all_prefs[0])
-
-    assert all(topic is None or topic > 0 for prefs in all_prefs for topic in prefs)
-
-    for prefs in all_prefs:
-        prefs.extend([None for x in range(len(prefs), number_topics)])
-
-    students = [Student(name, prefs, rank)
-                for rank, (name, prefs) in enumerate(zip(names, all_prefs))]
-
-    assignments = dict((i, None) for i in range(1, number_topics + 1))
-
-    while any(student.topic == '' for student in students):
-        for student in students:
-            if student.topic != '':
+        if line:
+            data.append(line)
+    return data
+
+
+def getPrefs(csv_data):
+    allPrefs = [[] for n in csv_data[0]]
+    for idx, prefs in enumerate(allPrefs):
+        for row in csv_data[1:]:
+            if idx < len(row):
+                topic = row[idx]
+                if topic is not None:
+                    prefs.append(topic)
+    return csv_data[0], allPrefs
+
+
+def rankedMatch(names, allPrefs):
+    assignments = dict()
+    N = len(names)
+    topics = ['' for _ in range(N)]
+    choices = [0 for _ in range(N)]
+    while any(topic == '' for topic in topics):
+        for idx in range(N):
+            if topics[idx] != '':
                 continue
-            if not student.prefs:
-                student.topic = None
+            prefs = allPrefs[idx]
+            if not prefs:
+                topics[idx] = None
                 continue
-            topic = student.prefs.pop(0)
-            student.choice += 1
-            current_student = assignments[topic]
+            topic = prefs.pop(0)
+            choices[idx] += 1
+            current_student = assignments.get(topic)
             if current_student is None:
-                assignments[topic] = student
-                student.topic = topic
-            elif current_student.rank > student.rank:
+                assignments[topic] = idx
+                topics[idx] = topic
+            elif current_student > idx:
                 # less rank is better
-                assignments[topic] = student
-                student.topic = topic
-                current_student.topic = ''
-
-    return students
+                assignments[topic] = idx
+                topics[idx] = topic
+                topics[current_student] = ''
+    return [Result(n, t, c) for n, t, c in zip(names, topics, choices)]
 
 
-def main():
-    if len(sys.argv) < 2:
-        sys.stdout.write("Usage:\n\t%s CSV_FILE_PATH\n" % (
-            os.path.basename(sys.argv[0])))
-        sys.stdout.write(__doc__ + '\n')
-        exit(1)
-
-    with open(sys.argv[1], 'r') as infile:
-        names, inverse_prefs = parseCSVFile(infile)
-    students = rangedMatch(names, inverse_prefs)
+def print_students(output, students):
     name_length = max(len(s.name) for s in students)
     for student in students:
-        sys.stdout.write('%-*s topic=%r (ranked=%d)\n' % (
+        output.write('%-*s topic=%r (ranked=%d)\n' % (
               name_length, student.name, student.topic, student.choice))
 
 
+def main():
+    argparser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=__doc__)
+    argparser.add_argument(
+        'CSV_FILE',
+        type=argparse.FileType('r'),
+        help='Path of CSV file to read.')
+    args = argparser.parse_args(sys.argv[1:])
+    data = parseCSVFile(args.CSV_FILE)
+    args.CSV_FILE.close()
+    print_students(sys.stdout, rankedMatch(*getPrefs(data)))
+
+
 if __name__ == '__main__':
     main()
diff --git a/test_pycodestyle.py b/test_pycodestyle.py
@@ -20,11 +20,23 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
+import logging
 import os
+import unittest
+
 try:
     import pycodestyle
 except ImportError:
     print('Try `python3 -m pip install pycodestyle`')
     raise
 
-pycodestyle.StyleGuide(paths=['--max-line-length=99']).check_files([os.path.dirname(__file__)])
+
+class StyleTestCase(unittest.TestCase):
+    def test_python_style(self):
+        pycodestyle.StyleGuide(
+            paths=['--max-line-length=99']).check_files([os.path.dirname(__file__)])
+
+
+if __name__ == '__main__':
+    logging.basicConfig(format='%(levelname)s:  %(message)s', level='WARNING')
+    unittest.main()
diff --git a/test_ranked_match.py b/test_ranked_match.py