|
1 | 1 | # -*- coding: utf-8 -*- |
2 | 2 | """ |
3 | | -Capitol Words |
4 | | -------------- |
| 3 | +Capitol Words Congressional speeches |
| 4 | +------------------------------------ |
5 | 5 |
|
6 | 6 | A collection of ~11k (almost all) speeches given by the main protagonists of the |
7 | 7 | 2016 U.S. Presidential election that had previously served in the U.S. Congress -- |
@@ -53,31 +53,31 @@ class CapitolWords(Dataset): |
53 | 53 | Download the data (one time only!) from the textacy-data repo |
54 | 54 | (https://github.com/bdewilde/textacy-data), and save its contents to disk:: |
55 | 55 |
|
56 | | - >>> cw = CapitolWords() |
57 | | - >>> cw.download() |
58 | | - >>> cw.info |
| 56 | + >>> ds = CapitolWords() |
| 57 | + >>> ds.download() |
| 58 | + >>> ds.info |
59 | 59 | {'name': 'capitol_words', |
60 | 60 | 'site_url': 'http://sunlightlabs.github.io/Capitol-Words/', |
61 | 61 | 'description': 'Collection of ~11k speeches in the Congressional Record given by notable U.S. politicians between Jan 1996 and Jun 2016.'} |
62 | 62 |
|
63 | 63 | Iterate over speeches as texts or records with both text and metadata:: |
64 | 64 |
|
65 | | - >>> for text in cw.texts(limit=3): |
| 65 | + >>> for text in ds.texts(limit=3): |
66 | 66 | ... print(text, end="\\n\\n") |
67 | | - >>> for text, meta in cw.records(limit=3): |
| 67 | + >>> for text, meta in ds.records(limit=3): |
68 | 68 | ... print("\\n{} ({})\\n{}".format(meta["title"], meta["speaker_name"], text)) |
69 | 69 |
|
70 | 70 | Filter speeches by a variety of metadata fields and text length:: |
71 | 71 |
|
72 | | - >>> for text, meta in cw.records(speaker_name="Bernie Sanders", limit=3): |
| 72 | + >>> for text, meta in ds.records(speaker_name="Bernie Sanders", limit=3): |
73 | 73 | ... print("\\n{}, {}\\n{}".format(meta["title"], meta["date"], text)) |
74 | | - >>> for text, meta in cw.records(speaker_party="D", congress={110, 111, 112}, |
| 74 | + >>> for text, meta in ds.records(speaker_party="D", congress={110, 111, 112}, |
75 | 75 | ... chamber="Senate", limit=3): |
76 | 76 | ... print(meta["title"], meta["speaker_name"], meta["date"]) |
77 | | - >>> for text, meta in cw.records(speaker_name={"Barack Obama", "Hillary Clinton"}, |
| 77 | + >>> for text, meta in ds.records(speaker_name={"Barack Obama", "Hillary Clinton"}, |
78 | 78 | ... date_range=("2005-01-01", "2005-12-31")): |
79 | 79 | ... print(meta["title"], meta["speaker_name"], meta["date"]) |
80 | | - >>> for text in cw.texts(min_len=50000): |
| 80 | + >>> for text in ds.texts(min_len=50000): |
81 | 81 | ... print(len(text)) |
82 | 82 |
|
83 | 83 | Stream speeches into a :class:`textacy.Corpus <textacy.corpus.Corpus>`:: |
|
0 commit comments