Documentation
¶
Overview ¶
Package textutil provides tools for creating Transformers as used in golang.org/x/text.
Index ¶
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Rewriter ¶
type Rewriter interface {
// Rewrite rewrites an indivisible segment of input. If any error is
// encountered, all reads and writes made within the same call to Rewrite
// will be discarded. Otherwise, the runes read from the input replace the
// runes written in the output.
//
// Rewrite must be called with a State representing non-empty input.
Rewrite(c State)
// Reset implements the Reset method of tranform.Transformer.
Reset()
}
A Rewriter rewrites UTF-8 bytes.
Example ¶
package main
import (
"fmt"
"unicode"
"unicode/utf8"
"github.com/mpvl/textutil"
"golang.org/x/text/transform"
)
func main() {
clean := textutil.NewTransformer(&cleanSpaces{})
fmt.Println(clean.String(" Hello world! \t Hello world! ")) // Hello world! Hello world!
escape := textutil.NewTransformerFromFunc(escape)
escaped := escape.String("Héllø wørl∂!") // H\u00E9ll\u00F8 w\u00F8rl\u2202!
fmt.Println(escaped)
unescape := textutil.NewTransformerFromFunc(unescape)
fmt.Println(unescape.String(escaped)) // Héllø wørl∂!
// As usual, Transformers can be chained together:
t := transform.Chain(escape, clean, unescape)
s, _, _ := transform.String(t, "\t\t\tHéllø \t wørl∂! ")
fmt.Println(s) // Héllø wørl∂!
}
// The cleanSpaces Rewriter collapses consecutive whitespace characters into a
// single space and trims them completely at the beginning and end of the input.
// It handles only one rune at a time.
type cleanSpaces struct {
notFirst, foundSpace bool
}
func (t *cleanSpaces) Rewrite(s textutil.State) {
switch r, _ := s.ReadRune(); {
case unicode.IsSpace(r):
t.foundSpace = true
case t.foundSpace && t.notFirst && !s.WriteRune(' '):
// Don't change the state if writing the space fails.
default:
t.foundSpace, t.notFirst = false, true
s.WriteRune(r)
}
}
func (t *cleanSpaces) Reset() { *t = cleanSpaces{} }
// escape rewrites input by escaping all non-ASCII runes and the escape
// character itself.
func escape(s textutil.State) {
switch r, _ := s.ReadRune(); {
case r >= 0xffff:
fmt.Fprintf(s, `\U%08X`, r)
case r >= utf8.RuneSelf:
fmt.Fprintf(s, `\u%04X`, r)
case r == '\\':
s.WriteString(`\\`)
default:
s.WriteRune(r)
}
}
// unescape unescapes input escaped by escaper.
func unescape(s textutil.State) {
if r, _ := s.ReadRune(); r != '\\' {
s.WriteRune(r)
return
}
n := 8
switch b, _ := s.ReadRune(); b {
case 'u':
n = 4
fallthrough
case 'U':
var r rune
for i := 0; i < n; i++ {
r <<= 4
switch b, _ := s.ReadRune(); {
case '0' <= b && b <= '9':
r |= b - '0'
case 'A' <= b && b <= 'F':
r |= b - 'A' + 10
default:
s.UnreadRune()
s.WriteRune(utf8.RuneError)
return
}
}
s.WriteRune(r)
case '\\':
s.WriteRune('\\')
default:
s.WriteRune(utf8.RuneError)
}
}
Output: Hello world! Hello world! H\u00E9ll\u00F8 w\u00F8rl\u2202! Héllø wørl∂! Héllø wørl∂!
type State ¶
type State interface {
// ReadRune returns the next rune from the source and the number of bytes
// consumed. It returns (RuneError, 1) for Invalid UTF-8 bytes. If the
// source buffer is empty, it will return (RuneError, 0).
ReadRune() (r rune, size int)
// UnreadRune unreads the most recently read rune and makes it available for
// a next call to Rewrite. Only one call to UnreadRune is allowed per
// Rewrite.
UnreadRune()
// WriteBytes writes the given byte slice to the destination and reports
// whether the write was successful.
WriteBytes(b []byte) bool
// WriteString writes the given string to the destination and reports
// whether the write was successful.
WriteString(s string) bool
// WriteRune writes the given rune to the destination and reports whether
// the write was successful.
WriteRune(r rune) bool
// Write implements io.Writer. The user is advised to use WriteBytes when
// conformance to io.Writer is not needed.
Write(b []byte) (n int, err error)
// SetError reports invalid source bytes.
SetError(err error)
}
State tracks the transformation of a minimal chunk of input. Reads and writes on a State will either be committed in full or not at all.
type Transformer ¶
type Transformer struct {
transform.SpanningTransformer
}
A Transformer wraps a transform.SpanningTransformer providing convenience methods for most of the functionality in the tranform package.
func NewTransformer ¶
func NewTransformer(r Rewriter) Transformer
NewTransformer returns a Transformer that uses the given Rewriter to transform input by repeatedly calling Rewrite until all input has been processed or an error is encountered.
func NewTransformerFromFunc ¶
func NewTransformerFromFunc(rewrite func(State)) Transformer
NewTransformerFromFunc calls NewTransform with a stateless Rewriter created from rewrite, which must follow the same guidelines as the Rewrite method of a Rewriter.
func (Transformer) Bytes ¶
func (t Transformer) Bytes(b []byte) []byte
Bytes returns a new byte slice with the result of converting b using t. It calls Reset on t. It returns nil if any error was found.
func (Transformer) Reset ¶
func (t Transformer) Reset()
Reset calls the Reset method of the underlying Transformer.
func (Transformer) Span ¶
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error)
Span calls the Span method of the underlying Transformer.
func (Transformer) String ¶
func (t Transformer) String(s string) string
String applies t to s and returns the result. This methods wraps transform.String. It returns the empty string if any error occurred.