import yaml
nb: you need to add the Loader=yaml.FullLoader part or else pyyaml gives a (harmless) warning
data = """
authors:
- Fernandez, Maria
date_added: '2007-04-27'
download: http://constant.all2all.org/~digitales/texts/fernandez_confNL.02
download_status: 404
language: Nederlands
license: Copyright
md5: 2f92214227eb6b8d8e2a6e4fc37ad6f0
projects:
- Cyberfeminist working days
- Digitales
status: Selected text
themes:
- (Cyber)feminism
- Body and technology
title: Postkoloniale Mediatheorie
type: text
url: https://www.constantvzw.org/verlag/spip.php?page=article&id_article=12&mot_filtre=4&id_lang=0&debut_source_material=0
year: '1999'
"""
d = yaml.load(data, Loader=yaml.FullLoader)
print (d)
{'authors': ['Fernandez, Maria'], 'date_added': '2007-04-27', 'download': 'http://constant.all2all.org/~digitales/texts/fernandez_confNL.02', 'download_status': 404, 'language': 'Nederlands', 'license': 'Copyright', 'md5': '2f92214227eb6b8d8e2a6e4fc37ad6f0', 'projects': ['Cyberfeminist working days', 'Digitales'], 'status': 'Selected text', 'themes': ['(Cyber)feminism', 'Body and technology'], 'title': 'Postkoloniale Mediatheorie', 'type': 'text', 'url': 'https://www.constantvzw.org/verlag/spip.php?page=article&id_article=12&mot_filtre=4&id_lang=0&debut_source_material=0', 'year': '1999'}
with open("data.yaml") as f:
d = yaml.load(f, Loader=yaml.FullLoader)
print (d)
{'authors': ['Fernandez, Maria'], 'date_added': '2007-04-27', 'download': 'http://constant.all2all.org/~digitales/texts/fernandez_confNL.02', 'download_status': 404, 'language': 'Nederlands', 'license': 'Copyright', 'md5': '2f92214227eb6b8d8e2a6e4fc37ad6f0', 'projects': ['Cyberfeminist working days', 'Digitales'], 'status': 'Selected text', 'themes': ['(Cyber)feminism', 'Body and technology'], 'title': 'Postkoloniale Mediatheorie', 'type': 'text', 'url': 'https://www.constantvzw.org/verlag/spip.php?page=article&id_article=12&mot_filtre=4&id_lang=0&debut_source_material=0', 'year': '1999'}
data = """
---
authors:
- Fernandez, Maria
date_added: '2007-04-27'
download: http://constant.all2all.org/~digitales/texts/fernandez_confNL.02
download_status: 404
language: Nederlands
license: Copyright
md5: 2f92214227eb6b8d8e2a6e4fc37ad6f0
projects:
- Cyberfeminist working days
- Digitales
status: Selected text
themes:
- (Cyber)feminism
- Body and technology
title: Postkoloniale Mediatheorie
type: text
url: https://www.constantvzw.org/verlag/spip.php?page=article&id_article=12&mot_filtre=4&id_lang=0&debut_source_material=0
year: '1999'
---
authors:
- Sayyid, Salman
date_added: '2007-10-01'
download: https://www.constantvzw.org/verlag/IMG/doc/Salman_Sayyid.doc
download_status: 200
language: English
license: Creative Commons Attribution-NoDerivs
md5: 747e31a9e4209d22cc30dfda12de65ce
projects:
- Stitch and Split
published_in:
- AS 178 Selves and Territories in Science Fiction
status: Selected text
themes:
- Science (-) Fiction
title: Dune, Depolitization and Decolonizing the Future
type: text
url: https://www.constantvzw.org/verlag/spip.php?page=article&id_article=46&mot_filtre=4&id_lang=0&debut_source_material=0
year: '2005'
"""
stream = yaml.load_all(data, Loader=yaml.FullLoader)
for d in stream:
print (d)
{'authors': ['Fernandez, Maria'], 'date_added': '2007-04-27', 'download': 'http://constant.all2all.org/~digitales/texts/fernandez_confNL.02', 'download_status': 404, 'language': 'Nederlands', 'license': 'Copyright', 'md5': '2f92214227eb6b8d8e2a6e4fc37ad6f0', 'projects': ['Cyberfeminist working days', 'Digitales'], 'status': 'Selected text', 'themes': ['(Cyber)feminism', 'Body and technology'], 'title': 'Postkoloniale Mediatheorie', 'type': 'text', 'url': 'https://www.constantvzw.org/verlag/spip.php?page=article&id_article=12&mot_filtre=4&id_lang=0&debut_source_material=0', 'year': '1999'} {'authors': ['Sayyid, Salman'], 'date_added': '2007-10-01', 'download': 'https://www.constantvzw.org/verlag/IMG/doc/Salman_Sayyid.doc', 'download_status': 200, 'language': 'English', 'license': 'Creative Commons Attribution-NoDerivs', 'md5': '747e31a9e4209d22cc30dfda12de65ce', 'projects': ['Stitch and Split'], 'published_in': ['AS 178 Selves and Territories in Science Fiction'], 'status': 'Selected text', 'themes': ['Science (-) Fiction'], 'title': 'Dune, Depolitization and Decolonizing the Future', 'type': 'text', 'url': 'https://www.constantvzw.org/verlag/spip.php?page=article&id_article=46&mot_filtre=4&id_lang=0&debut_source_material=0', 'year': '2005'}
same thing from data_stream.yaml
with open("data_stream.yaml") as f:
stream = yaml.load_all(f, Loader=yaml.FullLoader)
for d in stream:
print (d)
{'authors': ['Fernandez, Maria'], 'date_added': '2007-04-27', 'download': 'http://constant.all2all.org/~digitales/texts/fernandez_confNL.02', 'download_status': 404, 'language': 'Nederlands', 'license': 'Copyright', 'md5': '2f92214227eb6b8d8e2a6e4fc37ad6f0', 'projects': ['Cyberfeminist working days', 'Digitales'], 'status': 'Selected text', 'themes': ['(Cyber)feminism', 'Body and technology'], 'title': 'Postkoloniale Mediatheorie', 'type': 'text', 'url': 'https://www.constantvzw.org/verlag/spip.php?page=article&id_article=12&mot_filtre=4&id_lang=0&debut_source_material=0', 'year': '1999'} {'authors': ['Sayyid, Salman'], 'date_added': '2007-10-01', 'download': 'https://www.constantvzw.org/verlag/IMG/doc/Salman_Sayyid.doc', 'download_status': 200, 'language': 'English', 'license': 'Creative Commons Attribution-NoDerivs', 'md5': '747e31a9e4209d22cc30dfda12de65ce', 'projects': ['Stitch and Split'], 'published_in': ['AS 178 Selves and Territories in Science Fiction'], 'status': 'Selected text', 'themes': ['Science (-) Fiction'], 'title': 'Dune, Depolitization and Decolonizing the Future', 'type': 'text', 'url': 'https://www.constantvzw.org/verlag/spip.php?page=article&id_article=46&mot_filtre=4&id_lang=0&debut_source_material=0', 'year': '2005'}
You can also use list to convert the generator into a list of dictionaries
with open("data_stream.yaml") as f:
stream = yaml.load_all(f, Loader=yaml.FullLoader)
docs = list(stream)
print (len(docs))
print (docs[0])
print (docs[1])
2 {'authors': ['Fernandez, Maria'], 'date_added': '2007-04-27', 'download': 'http://constant.all2all.org/~digitales/texts/fernandez_confNL.02', 'download_status': 404, 'language': 'Nederlands', 'license': 'Copyright', 'md5': '2f92214227eb6b8d8e2a6e4fc37ad6f0', 'projects': ['Cyberfeminist working days', 'Digitales'], 'status': 'Selected text', 'themes': ['(Cyber)feminism', 'Body and technology'], 'title': 'Postkoloniale Mediatheorie', 'type': 'text', 'url': 'https://www.constantvzw.org/verlag/spip.php?page=article&id_article=12&mot_filtre=4&id_lang=0&debut_source_material=0', 'year': '1999'} {'authors': ['Sayyid, Salman'], 'date_added': '2007-10-01', 'download': 'https://www.constantvzw.org/verlag/IMG/doc/Salman_Sayyid.doc', 'download_status': 200, 'language': 'English', 'license': 'Creative Commons Attribution-NoDerivs', 'md5': '747e31a9e4209d22cc30dfda12de65ce', 'projects': ['Stitch and Split'], 'published_in': ['AS 178 Selves and Territories in Science Fiction'], 'status': 'Selected text', 'themes': ['Science (-) Fiction'], 'title': 'Dune, Depolitization and Decolonizing the Future', 'type': 'text', 'url': 'https://www.constantvzw.org/verlag/spip.php?page=article&id_article=46&mot_filtre=4&id_lang=0&debut_source_material=0', 'year': '2005'}