Skip to content

Commit b1e62bc

Browse files
alexottalenegro81
authored andcommitted
code cleanup, adding READMEs, etc.
1 parent d5017b5 commit b1e62bc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+171
-131
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,6 @@
33
/ch11/masc_word_sense_sentence_corpus.V1.0/
44
/.venv/
55
/.idea/
6+
/ch06/imports/yoochoose/dataset-README.txt
7+
ch06/imports/yoochoose/yoochoose-*.dat
8+
/ch06/imports/yoochoose/yoochoose-data.7z

__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__all__ = ["util", "ch12"]

ch04/imports/movielens/README.md

+15-11
Original file line numberDiff line numberDiff line change
@@ -4,38 +4,42 @@
44
The import scripts requires some specific library such as imdbpy which allows to access to ImDB (https://www.imdb.com/) api.
55
To install what is necessary run:
66

7-
`#make`
7+
```sh
8+
make
9+
```
810

911
## Download the data source
1012
The Makefile contains also the command to download the necessary data sources.
1113
Run:
1214

13-
`#make source`
15+
```sh
16+
make source
17+
```
1418

15-
If you would like to download it manually the URL is:
16-
17-
http://files.grouplens.org/datasets/movielens/ml-latest-small.zip
19+
You can also download it manually from [project's site](http://files.grouplens.org/datasets/movielens/ml-latest-small.zip)
1820

1921
The default location is in the home of this code repository in the directory datasets
20-
(eventually in the movielens subdirectory).
22+
(eventually in the `movielens` subdirectory).
2123

2224

2325
## Run the import
2426

25-
`python import_movielens.py -u <neo4j username> -p <password> -b <bolt uri> -s <source directory>`
27+
```sh
28+
python import_movielens.py -u <neo4j username> -p <password> -b <bolt uri> -s <source directory>
29+
```
2630

2731
If you used the makefile for downloading the directory you don't need to specify the datasource.
2832
The simple version takes a while to be completed. I recommend to run the parallel version as follows:
2933

30-
`python import_movielens_parallel.py -u <neo4j username> -p <password> -b <bolt uri> -s <source directory>`
34+
```sh
35+
python import_movielens_parallel.py -u <neo4j username> -p <password> -b <bolt uri> -s <source directory>
36+
```
3137

3238
## Note during the import
3339

3440
Note that IMDB imposes some constraints for the access to its API. Due to this, if the machine is powerful enough
3541
it can happen that it will start rejecting the requests. It is perfectly normal.
3642

37-
After the chapter has been released the full version of the IMDB has been released here:
38-
39-
https://www.imdb.com/interfaces/
43+
After the chapter has been released the full version of the IMDB has been released [here](https://www.imdb.com/interfaces/).
4044

4145
In the future I'll make some changes in order to load from files instead.
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
neo4j==4.2.0
1+
neo4j>=4.0,<4.3
22
imdbpy==2020.9.25
33
nose==1.3.7

ch04/recommendation/content_based_recommendation_second_approach.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(self, uri, user, password):
2222
def recommend_to(self, userId, k):
2323
user_VSM = self.get_user_vector(userId)
2424
movies_VSM, titles = self.get_movie_vectors(userId)
25-
top_k = self.compute_top_k (user_VSM, movies_VSM, k);
25+
top_k = self.compute_top_k (user_VSM, movies_VSM, k)
2626
results = []
2727
for movie in top_k:
2828
item = {}
@@ -92,8 +92,8 @@ def get_movie_vectors(self, user_id):
9292

9393
i = 0
9494
for movie in tx.run(list_of_moview_query, {"userId": user_id}):
95-
movie_id = movie["movieId"];
96-
title = movie["title"];
95+
movie_id = movie["movieId"]
96+
title = movie["title"]
9797
vector = tx.run(query, {"movieId": movie_id})
9898
movies_VSM[movie_id] = vector.single()[0]
9999
titles[movie_id] = title
@@ -115,6 +115,6 @@ def get_movie_vectors(self, user_id):
115115
print(__file__ , "Specify the user with -t <user id>")
116116
print("Setting the default to:", target_user)
117117
recommender = ContentBasedRecommenderSecondApproach(uri=uri, user=neo4j_user, password=neo4j_password)
118-
top10 = recommender.recommend_to(target_user, 10); #Replace 598 with any other user id you are interested in
118+
top10 = recommender.recommend_to(target_user, 10) #Replace 598 with any other user id you are interested in
119119
print(top10)
120120

ch04/recommendation/content_based_recommendation_third_approach.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def compute_and_store_similarity(self):
1212
movies_VSM = self.get_movie_vectors()
1313
i = 0
1414
for movie in movies_VSM:
15-
knn = self.compute_knn(movie, movies_VSM.copy(), 10);
15+
knn = self.compute_knn(movie, movies_VSM.copy(), 10)
1616
self.store_knn(movie, knn)
1717
# would be useful to add a progress bar here as well...
1818
i += 1
@@ -53,7 +53,7 @@ def get_movie_vectors(self):
5353

5454
i = 0
5555
for movie in tx.run(list_of_moview_query):
56-
movie_id = movie["movieId"];
56+
movie_id = movie["movieId"]
5757
vector = tx.run(query, {"movieId": movie_id})
5858
movies_VSM[movie_id] = vector.single()[0]
5959
i += 1
@@ -110,6 +110,6 @@ def recommendTo(self, user_id, k):
110110
uri = "bolt://localhost:7687"
111111
recommender = ContentBasedRecommender(uri=uri, user="neo4j", password="pippo1")
112112
# would be nice to have a control of execution - like, recalculate everything only if specific flag is set, or something like
113-
recommender.compute_and_store_similarity();
114-
top10 = recommender.recommendTo("598", 10);
113+
recommender.compute_and_store_similarity()
114+
top10 = recommender.recommendTo("598", 10)
115115
print(top10)

ch05/imports/retail_rocket/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ pip install -r requirements.txt
1818
## Download & import the dataset
1919

2020

21-
For this chapter we're using the [Retailrocket recommender system dataset](https://www.kaggle.com/retailrocket/ecommerce-dataset?select=item_properties_part1.csv) available at Kaggle. From this dataset we need only the `events.csv` file (you can download the whole dataset), download it and put it into some directory.
21+
For this chapter we're using the [Retailrocket recommender system dataset](https://www.kaggle.com/retailrocket/ecommerce-dataset) available from Kaggle. From this dataset we need only the `events.csv` file (you can download the whole dataset), download it and put it into some directory.
2222

2323
Importing of data is performed with following command (you may need to update Neo4j username & password in the file):
2424

ch05/imports/retail_rocket/import_retail_rocket_ui.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from neo4j import GraphDatabase
44
import sys
55

6+
67
class RetailRocketImporter(object):
78

89
def __init__(self, uri, user, password):
@@ -16,7 +17,7 @@ def executeNoException(self, session, query):
1617
session.run(query)
1718
except Exception as e:
1819
pass
19-
20+
2021
def import_user_item(self, file):
2122
with open(file, 'r+') as in_file:
2223
reader = csv.reader(in_file, delimiter=',')
@@ -27,8 +28,8 @@ def import_user_item(self, file):
2728
self.executeNoException(session, "CREATE CONSTRAINT ON (u:Item) ASSERT u.itemId IS UNIQUE")
2829

2930
tx = session.begin_transaction()
30-
i = 0;
31-
j = 0;
31+
i = 0
32+
j = 0
3233
query = """
3334
MERGE (item:Item {itemId: $itemId})
3435
MERGE (user:User {userId: $userId})
@@ -43,7 +44,7 @@ def import_user_item(self, file):
4344
item_id = strip(row[3])
4445

4546
if event_type == "transaction":
46-
tx.run(query, {"itemId":item_id, "userId": user_id, "timestamp": timestamp})
47+
tx.run(query, {"itemId": item_id, "userId": user_id, "timestamp": timestamp})
4748
i += 1
4849
j += 1
4950
if i == 1000:
@@ -59,13 +60,14 @@ def import_user_item(self, file):
5960

6061
def strip(string): return ''.join([c if 0 < ord(c) < 128 else ' ' for c in string])
6162

63+
6264
if __name__ == '__main__':
6365
start = time.time()
6466
uri = "bolt://localhost:7687"
6567
user = "neo4j"
66-
password = "q1" # pippo1
68+
password = "q1" # pippo1
6769
file_path = "/Users/ale/neo4j-servers/gpml/dataset/retailrocket-recommender-system-dataset/events.csv"
68-
if (len(sys.argv) > 1):
70+
if len(sys.argv) > 1:
6971
file_path = sys.argv[1]
7072
importing = RetailRocketImporter(uri=uri, user=user, password=password)
7173
importing.import_user_item(file=file_path)
+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
neo4j
1+
neo4j>=4.0,<4.3

ch05/recommendation/collaborative_filtering/recommender.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import itertools
21
from enum import Enum
32
from typing import Dict, List
43

ch05/recommendation/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
neo4j
1+
neo4j>=4.0,<4.3

ch06/imports/yoochoose/Makefile

+6-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ init:
22
pip install -U -r requirements.txt
33

44
get_data:
5-
curl -L -o yoochoose-data.7z https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z
5+
if [ ! -f yoochoose-data.7z ]; then \
6+
curl -L -o yoochoose-data.7z https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z ;\
7+
fi; \
8+
if [ ! -f yoochoose-buys.dat ]; then \
9+
7z x yoochoose-data.7z; \
10+
fi
611

712
test:
813
nosetests tests

ch06/imports/yoochoose/README.md

+34-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,34 @@
1-
make
1+
This directory contains the code for importing of data that are used in the chapter 6 of the book.
2+
3+
## Install dependencies
4+
5+
To install all necessary dependencies just run the:
6+
7+
```sh
8+
make
9+
```
10+
11+
or
12+
13+
```sh
14+
pip install -r requirements.txt
15+
```
16+
17+
18+
## Download & import the dataset
19+
20+
21+
For this chapter we're using the [Youchoose dataset](https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z). To download & unpack it just type (you need to have 7Zip binary installed):
22+
23+
```sh
24+
make get_data
25+
```
26+
27+
Importing of data is performed with following command (you may need to update Neo4j username & password in the file):
28+
29+
```sh
30+
python import_yoochoose.py path_to_youchoose_dataset
31+
32+
```
33+
34+
If you used `make get_data`, use `.` for `path_to_youchoose_dataset`

ch06/imports/yoochoose/import_yoochoose.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def import_session_data(self, file):
2424
self.executeNoException(session, "CREATE CONSTRAINT ON (s:Session) ASSERT s.sessionId IS UNIQUE")
2525
self.executeNoException(session, "CREATE CONSTRAINT ON (i:Item) ASSERT i.itemId IS UNIQUE")
2626
dtype = {"sessionID": np.int64, "itemID": np.int64, "category": np.object}
27-
j = 0;
27+
j = 0
2828
for chunk in pd.read_csv(file,
2929
header=0,
3030
dtype=dtype,
@@ -33,7 +33,7 @@ def import_session_data(self, file):
3333
chunksize=10**6):
3434
df = chunk
3535
tx = session.begin_transaction()
36-
i = 0;
36+
i = 0
3737
query = """
3838
MERGE (session:Session {sessionId: $sessionId})
3939
MERGE (item:Item {itemId: $itemId, category: $category})
@@ -68,7 +68,7 @@ def import_session_data(self, file):
6868
def import_buys_data(self, file):
6969
with self._driver.session() as session:
7070
dtype = {"sessionID": np.int64, "itemID": np.int64, "price": np.float, "quantity": np.int}
71-
j = 0;
71+
j = 0
7272
for chunk in pd.read_csv(file,
7373
header=0,
7474
dtype=dtype,
@@ -77,7 +77,7 @@ def import_buys_data(self, file):
7777
chunksize=10**6):
7878
df = chunk
7979
tx = session.begin_transaction()
80-
i = 0;
80+
i = 0
8181
query = """
8282
MATCH (session:Session {sessionId: $sessionId})
8383
MATCH (item:Item {itemId: $itemId})
@@ -115,7 +115,7 @@ def strip(string): return ''.join([c if 0 < ord(c) < 128 else ' ' for c in strin
115115
user = "neo4j"
116116
password = "q1" # pippo1
117117
base_path = "/Users/ale/neo4j-servers/gpml/dataset/yoochoose-data"
118-
if (len(sys.argv) > 1):
118+
if len(sys.argv) > 1:
119119
base_path = sys.argv[1]
120120
importing = YoochooseImporter(uri=uri, user=user, password=password)
121121
importing.import_session_data(file=base_path + "/yoochoose-clicks.dat")

ch06/imports/yoochoose/import_yoochoose_advanced.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def executeNoException(self, session, query):
2222

2323
def import_session_data(self, file):
2424
dtype = {"sessionID": np.int64, "itemID": np.int64, "category": np.object}
25-
j = 0;
25+
j = 0
2626
sess_clicks = {}
2727
for chunk in pd.read_csv(file,
2828
header=0,
@@ -66,7 +66,7 @@ def import_session_data(self, file):
6666
self.executeNoException(session, "CREATE CONSTRAINT ON (i:Item) ASSERT i.itemId IS UNIQUE")
6767

6868
tx = session.begin_transaction()
69-
i = 0;
69+
i = 0
7070
j = 0
7171
query = """
7272
CREATE (session:Session {sessionId: $sessionId})
@@ -101,8 +101,8 @@ def import_session_data(self, file):
101101
def import_buys_data(self, file, sess_clicks):
102102
with self._driver.session() as session:
103103
dtype = {"sessionID": np.int64, "itemID": np.int64, "price": np.float, "quantity": np.int}
104-
i = 0;
105-
j = 0;
104+
i = 0
105+
j = 0
106106
query = """
107107
MATCH (session:Session {sessionId: $sessionId})
108108
MATCH (item:Item {itemId: $itemId})
@@ -146,8 +146,8 @@ def post_processing(self, sess_clicks):
146146
print("start post processing")
147147
with self._driver.session() as session:
148148
tx = session.begin_transaction()
149-
i = 0;
150-
j = 0;
149+
i = 0
150+
j = 0
151151
post_processing_query = """
152152
MATCH (s:Session {sessionId: $sessionId})-[:CONTAINS]->(click)
153153
WITH s, click
@@ -185,7 +185,7 @@ def strip(string): return ''.join([c if 0 < ord(c) < 128 else ' ' for c in strin
185185
user = "neo4j"
186186
password = "q1" # pippo1
187187
base_path = "/Users/ale/neo4j-servers/gpml/dataset/yoochoose-data"
188-
if (len(sys.argv) > 1):
188+
if len(sys.argv) > 1:
189189
base_path = sys.argv[1]
190190
importer = YoochooseImporter(uri=uri, user=user, password=password)
191191

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
neo4j
1+
neo4j>=4.0,<4.3
22
pandas

ch06/recommendation/requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
neo4j
1+
neo4j>=4.0,<4.3
22
pandas
33
sklearn
4+
annoy

ch06/recommendation/session_based_recommendation_iknn_approach.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def close(self):
1515
def compute_and_store_similarity(self):
1616
items_VSM = self.get_item_vectors()
1717
for item in items_VSM:
18-
knn = self.compute_knn(item, items_VSM.copy(), 20);
18+
knn = self.compute_knn(item, items_VSM.copy(), 20)
1919
self.store_knn(item, knn)
2020

2121
def compute_knn(self, item, items, k):
@@ -47,7 +47,7 @@ def get_item_vectors(self):
4747
with self._driver.session() as session:
4848
i = 0
4949
for item in session.run(list_of_items_query):
50-
item_id = item["itemId"];
50+
item_id = item["itemId"]
5151
vector = session.run(query, {"itemId": item_id})
5252
items_VSM_sparse[item_id] = vector.single()[0]
5353
i += 1
@@ -100,7 +100,7 @@ def recommend_to(self, item_id, k):
100100
user = "neo4j"
101101
password = "q1" # pippo1
102102
recommender = SessionBasedRecommender(uri=uri, user=user, password=password)
103-
recommender.compute_and_store_similarity();
104-
top10 = recommender.recommend_to(214842060, 10);
103+
recommender.compute_and_store_similarity()
104+
top10 = recommender.recommend_to(214842060, 10)
105105
recommender.close()
106106
print(top10)

0 commit comments

Comments
 (0)