Skip to content

Commit 7c965c0

Browse files
committed
metadata and regional bucket
1 parent b03f3ee commit 7c965c0

File tree

5 files changed

+42
-9
lines changed

5 files changed

+42
-9
lines changed

dataflow-deploy.sh

+17-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,19 @@
1-
mvn compile exec:java -Dexec.mainClass=com.solubris.Scrape \
2-
-Dexec.args="--runner=DataflowRunner --project=explore-447815 \
1+
#!/usr/bin/env bash
2+
3+
source env.sh
4+
5+
gcloud storage buckets create --project="$GCP_PROJECT" --location=$GCP_REGION "$GCP_BUCKET"
6+
gcloud storage cp metadata.json "$GCP_BUCKET/scrape-1.0.0_metadata"
7+
8+
mvn clean compile exec:java -Dexec.mainClass=com.solubris.Scrape \
9+
-Dexec.args="--runner=DataflowRunner --project=$GCP_PROJECT \
310
--url=https://www.rhs.org.uk/shows-events/rhs-chelsea-flower-show/ticket-options \
4-
--gcpTempLocation=gs://explore-temp-bucket/tmp \
5-
--templateLocation=gs://explore-temp-bucket/scrape-1.0.0 \
6-
--region=europe-west1" \
11+
--gcpTempLocation=$GCP_BUCKET/tmp \
12+
--region=$GCP_REGION \
13+
--templateLocation=$GCP_BUCKET/scrape-1.0.0" \
714
-Pdataflow-runner
15+
16+
#gcloud storage cp metadata.json gs://explore-temp-bucket/scrape-1.0.0_metadata
17+
#gcloud auth login
18+
#gcloud config set project PROJECT_ID
19+

dataflow.sh

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
1-
mvn compile exec:java -Dexec.mainClass=com.solubris.Scrape \
2-
-Dexec.args="--runner=DataflowRunner --project=explore-447815 \
1+
#!/usr/bin/env bash
2+
3+
source env.sh
4+
5+
mvn clean compile exec:java -Dexec.mainClass=com.solubris.Scrape \
6+
-Dexec.args="--runner=DataflowRunner --project=$GCP_PROJECT \
37
--url=https://www.rhs.org.uk/shows-events/rhs-chelsea-flower-show/ticket-options \
4-
--gcpTempLocation=gs://explore-temp-bucket/tmp \
5-
--region=europe-west1" \
8+
--gcpTempLocation=$GCP_BUCKET/tmp \
9+
--region=$GCP_REGION" \
610
-Pdataflow-runner

direct.sh

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env bash
2+
13
mvn compile exec:java -Dexec.mainClass=com.solubris.Scrape \
24
-Dexec.args="--runner=DirectRunner \
35
--url=https://www.rhs.org.uk/shows-events/rhs-chelsea-flower-show/ticket-options" \

env.sh

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
export GCP_PROJECT=explore-447815
2+
export GCP_REGION=europe-west2
3+
export GCP_BUCKET="gs://explore-$GCP_REGION"

metadata.json

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"description": "Scrapes the given URL",
3+
"name": "scrape-rhs-new",
4+
"streaming": false,
5+
"parameters": [
6+
{
7+
"name": "url",
8+
"helpText": "url to be scraped",
9+
"label": "url to be scraped"
10+
}
11+
]
12+
}

0 commit comments

Comments
 (0)