Skip to content

Commit 9f77084

Browse files
committed
vignette updates
1 parent cefd1cd commit 9f77084

4 files changed

+58
-91
lines changed

vignettes/citation-networks.Rmd.orig

+44-46
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,15 @@ vignette: >
99

1010
```{r, include = FALSE}
1111
knitr::opts_chunk$set(
12-
collapse = TRUE,
13-
comment = "#>",
12+
collapse = TRUE,
13+
comment = "#>",
1414
warning = FALSE,
1515
message = FALSE,
16-
dev = "png",
1716
fig.cap = ""
1817
)
1918
```
2019
> Important API Change
21-
>
20+
>
2221
> The new version of the API requires an API key, or all of your requests will be blocked. See the [API Changes](api-changes.html#an-api-key-is-required-1) page.
2322

2423

@@ -33,7 +32,6 @@ library(visNetwork)
3332
library(magrittr)
3433
library(stringr)
3534
library(knitr)
36-
library(webshot2)
3735

3836
# Write a query to pull patents assigned to the CPC code of "Y10S707/933"
3937
query <- qry_funs$contains(cpc_current.cpc_group_id = "Y10S707/933")
@@ -89,17 +87,17 @@ pat_title <- function(title, number) {
8987

9088
edges <-
9189
res_lst$us_patent_citations %>%
92-
semi_join(x = ., y = ., by = c("citation_patent_id" = "patent_id")) %>%
93-
select(-citation_sequence) %>% # discard citation_sequence that we don't need here
94-
set_colnames(c("from", "to"))
90+
semi_join(x = ., y = ., by = c("citation_patent_id" = "patent_id")) %>%
91+
select(-citation_sequence) %>% # discard citation_sequence that we don't need here
92+
set_colnames(c("from", "to"))
9593

9694
nodes <-
9795
pat_lst$patents %>%
98-
mutate(
99-
id = patent_id,
100-
label = patent_id,
101-
title = pat_title(patent_title, patent_id)
102-
)
96+
mutate(
97+
id = patent_id,
98+
label = patent_id,
99+
title = pat_title(patent_title, patent_id)
100+
)
103101

104102
visNetwork(
105103
nodes = nodes, edges = edges, height = "400px", width = "100%",
@@ -121,14 +119,14 @@ With only 3 patents, it will probably be possible to visualize how these patents
121119
```{r}
122120
rel_pats <-
123121
res_lst$us_patent_citations %>%
124-
rbind(setNames(res_lst$us_patent_citations, names(.))) %>%
125-
select(-patent_id) %>%
126-
rename(patent_id = citation_patent_id) %>%
127-
bind_rows(data.frame(patent_id = p3)) %>%
128-
distinct() %>%
129-
filter(!is.na(patent_id))
130-
131-
# Look up which patents the relevant patents cite. We need to use the
122+
rbind(setNames(res_lst$us_patent_citations, names(.))) %>%
123+
select(-patent_id) %>%
124+
rename(patent_id = citation_patent_id) %>%
125+
bind_rows(data.frame(patent_id = p3)) %>%
126+
distinct() %>%
127+
filter(!is.na(patent_id))
128+
129+
# Look up which patents the relevant patents cite. We need to use the
132130
# patent_citation endpoint now.
133131

134132
rel_pats_res <- search_pv(
@@ -150,20 +148,20 @@ cited_pats <-
150148

151149
full_network <-
152150
cited_pats %>%
153-
do({
154-
.$ind <-
151+
do({
152+
.$ind <-
153+
group_by(., patent_id) %>%
154+
group_indices()
155155
group_by(., patent_id) %>%
156-
group_indices()
157-
group_by(., patent_id) %>%
158-
mutate(sqrt_num_cited = sqrt(n()))
159-
}) %>%
160-
inner_join(x = ., y = ., by = "citation_patent_id") %>%
161-
filter(ind.x > ind.y) %>%
162-
group_by(patent_id.x, patent_id.y) %>%
163-
mutate(cosine_sim = n() / (sqrt_num_cited.x * sqrt_num_cited.y)) %>%
164-
ungroup() %>%
165-
select(matches("patent_id\\.|cosine_sim")) %>%
166-
distinct()
156+
mutate(sqrt_num_cited = sqrt(n()))
157+
}) %>%
158+
inner_join(x = ., y = ., by = "citation_patent_id") %>%
159+
filter(ind.x > ind.y) %>%
160+
group_by(patent_id.x, patent_id.y) %>%
161+
mutate(cosine_sim = n() / (sqrt_num_cited.x * sqrt_num_cited.y)) %>%
162+
ungroup() %>%
163+
select(matches("patent_id\\.|cosine_sim")) %>%
164+
distinct()
167165

168166
kable(head(full_network))
169167
```
@@ -183,21 +181,21 @@ There appears to be a smallish group of patent pairs that are very similar to on
183181
```{r citationvis2}
184182
edges <-
185183
full_network %>%
186-
filter(cosine_sim >= .1) %>%
187-
rename(from = patent_id.x, to = patent_id.y, value = cosine_sim) %>%
188-
mutate(title = paste("Cosine similarity =", as.character(round(value, 3))))
184+
filter(cosine_sim >= .1) %>%
185+
rename(from = patent_id.x, to = patent_id.y, value = cosine_sim) %>%
186+
mutate(title = paste("Cosine similarity =", as.character(round(value, 3))))
189187

190188
nodes <-
191189
rel_pats_lst$us_patent_citations %>%
192-
distinct(patent_id) %>%
193-
rename(id = patent_id) %>%
194-
mutate(
195-
# the 3 patents of interest will be represented as blue nodes, all others
196-
# will be yellow
197-
color = ifelse(id %in% p3, "#97C2FC", "#DDCC77"),
198-
label = id,
199-
title = pat_title(id, id) # we don't get patent_title now (formerly first argument)
200-
)
190+
distinct(patent_id) %>%
191+
rename(id = patent_id) %>%
192+
mutate(
193+
# the 3 patents of interest will be represented as blue nodes, all others
194+
# will be yellow
195+
color = ifelse(id %in% p3, "#97C2FC", "#DDCC77"),
196+
label = id,
197+
title = pat_title(id, id) # we don't get patent_title now (formerly first argument)
198+
)
201199

202200
visNetwork(
203201
nodes = nodes, edges = edges, height = "700px", width = "100%",

vignettes/examples.Rmd.orig

+8-5
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ knitr::opts_chunk$set(
1414
)
1515
```
1616

17-
With the recent [API changes](api-changes.html), the patent endpoint is the main way to retrieve data. The other
17+
With the recent [API changes](api-changes.html), the patent endpoint is the main way to retrieve data. The other
1818
endpoints supply additional information. Also note that an API key is required.
1919

2020
## Patent endpoint
@@ -24,7 +24,9 @@ Which patents have been cited by more than 500 US patents?
2424
```{r}
2525
library(patentsview)
2626

27-
search_pv(query = qry_funs$gt(patent_num_times_cited_by_us_patents = 500))
27+
fields = c("patent_id", "patent_title", "patent_date")
28+
search_pv(query = qry_funs$gt(patent_num_times_cited_by_us_patents = 500),
29+
fields = fields)
2830
```
2931

3032
How many distinct inventors are represented by these highly-cited patents?
@@ -33,7 +35,7 @@ How many distinct inventors are represented by these highly-cited patents?
3335

3436
search_pv(
3537
query = qry_funs$gt(patent_num_times_cited_by_us_patents = 500),
36-
fields = c("patent_id", "inventors.inventor_id")
38+
fields = c("patent_id", "inventors")
3739
)
3840
```
3941

@@ -84,7 +86,8 @@ Which inventor's most recent patent has Chicago, IL listed as their location.
8486
pv_out <- search_pv(
8587
query = '{"_and":[{"_text_phrase": {"inventor_lastknown_city":"Chicago"}},
8688
{"_text_phrase": {"inventor_lastknown_state":"IL"}}]}',
87-
endpoint = "inventor"
89+
endpoint = "inventor",
90+
fields = c("inventor_id", "inventor_name_first", "inventor_name_last")
8891
)
8992

9093
pv_out
@@ -119,7 +122,7 @@ What assignee's organizations start Microsoft?
119122

120123
```{r}
121124
query <- qry_funs$begins(assignee_organization = "Microsoft")
122-
fields <- c("assignee_id", "assignee_organization")
125+
fields <- c("assignees")
123126
pv_out <- search_pv(query, fields = fields, endpoint = "assignee")
124127
pv_out$data$assignees$assignee_organization
125128
```

vignettes/getting-started.Rmd.orig

+5-38
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ library(patentsview)
2828

2929
search_pv(
3030
query = '{"_gte":{"patent_date":"2007-01-01"}}',
31-
endpoint = "patent"
31+
endpoint = "patent",
32+
fields = c("patent_id", "patent_title", "patent_date")
3233
)
3334
```
3435

@@ -66,10 +67,11 @@ Check out the [writing queries vignette](writing-queries.html) for more details
6667

6768
## Fields
6869

69-
Each endpoint has a different set of fields. The new version of the API allows all fields to be queried. In the first example, we didn't specify which fields we wanted to retrieve so we were given the default set. You can specify which fields you want using the `fields` argument:
70+
Each endpoint has a different set of fields. The new version of the API allows all fields to be queried. You can specify which fields you want using the `fields` argument. If you don't specify any, you will get
71+
the primary key(s) for the specified endpoint.
7072

7173
```{r}
72-
# search_pv defaults the endpoint parameter to "patents" if not specified
74+
# search_pv defaults the endpoint parameter to "patent" if not specified
7375
result = search_pv(
7476
query = '{"_gte":{"patent_date":"2007-01-01"}}',
7577
fields = c("patent_id", "patent_title")
@@ -153,41 +155,6 @@ Your choice of endpoint determines two things:
153155

154156
Most of the time you will want to use the patent endpoint. Note that you can still effectively filter on fields that are not at the patent-level when using the patent endpoint (e.g., you can filter on assignee name or CPC category). This is because patents are relatively low-level entities. For higher level entities like assignees, if you filter on a field that is not at the assignee-level (e.g., inventor name), the API will return data on any assignee that has at least one inventor whose name matches your search, which is probably not what you want.
155157

156-
## Casting fields
157-
158-
The API now returns most data fields as an appropriate data type (e.g., numeric). There are, however,
159-
a few fields that are returned as strings rather than integers and there are 33 date fields now.
160-
There are also two rule_47_flags, one returned by the patent endpoint, the other by the publication endpoint.
161-
The former is a boolean while the latter is a string that can be cast to a boolean. Lastly, most
162-
of the document numbers are numeric though there are two instances where they are strings that can
163-
be cast to integers. You can cast all fields to their preferred R types using `cast_pv_data()`:
164-
165-
```{r}
166-
library(knitr)
167-
168-
# These fields are received as strings and would be cast to integers
169-
ints <- fieldsdf[fieldsdf$data_type == "int", c("endpoint","field")]
170-
ints <- ints[order(ints$field),]
171-
print(ints, row.names = FALSE)
172-
173-
# These fields are received as strings and would be cast to dates
174-
date_fields <- fieldsdf[fieldsdf$data_type == "date", c("endpoint","field")]
175-
kable(date_fields, row.names = FALSE)
176-
177-
res <- search_pv(
178-
query = '{"patent_id":"5116621"}',
179-
fields = c("patent_id", "patent_date", "patent_title", "patent_year", "assignees.assignee_type")
180-
)
181-
182-
# unnest_pv_data shows that the assignee_type field is received as a string and patent_year
183-
# is received as an integer
184-
unnest_pv_data(res$data)
185-
186-
# cast_pv_data will convert the string patent_date to an R date and assignee_type to an integer
187-
recast <- cast_pv_data(res$data)
188-
unnest_pv_data(recast)
189-
```
190-
191158
## FAQs
192159

193160
#### I'm sure my query is well formatted and correct but I keep getting an error. What's the deal?

vignettes/top-assignees.Rmd.orig

+1-2
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ query
6666
fields <- c(
6767
"patent_id", "patent_date", "patent_year", "patent_earliest_application_date",
6868
"patent_num_us_patents_cited", "application.filing_date",
69-
"assignees.assignee_organization",
70-
"assignees.assignee_id" # the assignee fields come back in a nested object
69+
"assignees" # the assignee fields come back in a nested object
7170
)
7271

7372
# Send an HTTP request to the PatentsView API to get the data

0 commit comments

Comments
 (0)