How can I control what scalar form PyYAML uses for my data?
Falling in love with @lbt's approach, I got this code:
import yamldef str_presenter(dumper, data): if len(data.splitlines()) > 1: # check for multiline string return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') return dumper.represent_scalar('tag:yaml.org,2002:str', data)yaml.add_representer(str, str_presenter)# to use with safe_dump:yaml.representer.SafeRepresenter.add_representer(str, str_presenter)
It makes every multiline string be a block literal.
I was trying to avoid the monkey patching part.Full credit to @lbt and @J.F.Sebastian.
import yamlfrom collections import OrderedDictclass quoted(str): passdef quoted_presenter(dumper, data): return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"')yaml.add_representer(quoted, quoted_presenter)class literal(str): passdef literal_presenter(dumper, data): return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')yaml.add_representer(literal, literal_presenter)def ordered_dict_presenter(dumper, data): return dumper.represent_dict(data.items())yaml.add_representer(OrderedDict, ordered_dict_presenter)d = OrderedDict(short=quoted("Hello"), long=literal("Line1\nLine2\nLine3\n"))print(yaml.dump(d))
Output
short: "Hello"long: | Line1 Line2 Line3
I wanted any input with a \n
in it to be a block literal. Using the code in yaml/representer.py
as a base I got:
# -*- coding: utf-8 -*-import yamldef should_use_block(value): for c in u"\u000a\u000d\u001c\u001d\u001e\u0085\u2028\u2029": if c in value: return True return Falsedef my_represent_scalar(self, tag, value, style=None): if style is None: if should_use_block(value): style='|' else: style = self.default_style node = yaml.representer.ScalarNode(tag, value, style=style) if self.alias_key is not None: self.represented_objects[self.alias_key] = node return nodea={'short': "Hello", 'multiline': """Line1Line2Line3""", 'multiline-unicode': u"""Lêne1Lêne2Lêne3"""}print(yaml.dump(a))print(yaml.dump(a, allow_unicode=True))yaml.representer.BaseRepresenter.represent_scalar = my_represent_scalarprint(yaml.dump(a))print(yaml.dump(a, allow_unicode=True))
Output
{multiline: 'Line1 Line2 Line3 ', multiline-unicode: "L\xEAne1\nL\xEAne2\nL\xEAne3\n", short: Hello}{multiline: 'Line1 Line2 Line3 ', multiline-unicode: 'Lêne1 Lêne2 Lêne3 ', short: Hello}After overridemultiline: | Line1 Line2 Line3multiline-unicode: "L\xEAne1\nL\xEAne2\nL\xEAne3\n"short: Hellomultiline: | Line1 Line2 Line3multiline-unicode: | Lêne1 Lêne2 Lêne3short: Hello