Skip to content

Commit 42537a9

Browse files
author
peterrr
committed
Code zum Erzeugen des Graphen
1 parent 2691d3e commit 42537a9

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed

src/create_categorie_graph.R

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
create_category_graph <- function(ctgry) {
2+
pages <- get_pages(ctgry)
3+
4+
cats_page_list <- list()
5+
6+
for(i in 1:length(pages)) {
7+
cats_page_list[length(cats_page_list)+1] <- list(get_page_categories(pages[[i]]))
8+
}
9+
10+
graph_df <- data.frame(X1=numeric(0), X2=numeric(0))
11+
for(i in 1:length(cats_page_list)) {
12+
cats <- cats_page_list[[i]]
13+
if(length(cats) <= 1) {
14+
next
15+
}
16+
17+
#print(length(cats))
18+
edge_combs <- combn(unlist(cats), 2)
19+
edge_df <- data.frame(matrix(t(edge_combs), ncol=2))
20+
graph_df <- merge(graph_df, edge_df, all=T)
21+
}
22+
23+
return(graph_df)
24+
#return(cats_page_list)
25+
}
26+
27+
get_pages <- function(ctgry) {
28+
cat_url <- paste("http://en.wikipedia.org/w/api.php?action=query&list=categorymembers&cmlimit=100&cmtype=page&format=json&cmtitle=Category:", ctgry, sep="")
29+
pages <- fromJSON(cat_url)$query$categorymembers
30+
31+
page_list <- list()
32+
33+
for(i in 1:length(pages)) {
34+
page_list[length(page_list)+1] <- pages[[i]]$pageid
35+
}
36+
37+
38+
return(page_list)
39+
}
40+
41+
get_page_categories <- function(pg_id) {
42+
page_cat_url <- paste("http://en.wikipedia.org/w/api.php?action=query&prop=categories&clshow=!hidden&format=json&pageids=", pg_id, sep="")
43+
44+
page_cats <- fromJSON(page_cat_url)$query$pages[[1]]$categories
45+
46+
cat_list <- list()
47+
48+
for(i in 1:length(page_cats)) {
49+
cat_list[length(cat_list)+1] <- unlist(strsplit(page_cats[[i]]$title, ":"))[[2]]
50+
}
51+
52+
return(cat_list)
53+
}

0 commit comments

Comments
 (0)