256
|
1 |
// Copyright 2019 The Go Authors. All rights reserved. |
|
2 |
// Use of this source code is governed by a BSD-style |
|
3 |
// license that can be found in the LICENSE file. |
|
4 |
|
|
5 |
// Package strs provides string manipulation functionality specific to protobuf. |
|
6 |
package strs |
|
7 |
|
|
8 |
import ( |
|
9 |
"go/token" |
|
10 |
"strings" |
|
11 |
"unicode" |
|
12 |
"unicode/utf8" |
|
13 |
|
|
14 |
"google.golang.org/protobuf/internal/flags" |
|
15 |
"google.golang.org/protobuf/reflect/protoreflect" |
|
16 |
) |
|
17 |
|
|
18 |
// EnforceUTF8 reports whether to enforce strict UTF-8 validation. |
|
19 |
func EnforceUTF8(fd protoreflect.FieldDescriptor) bool { |
|
20 |
if flags.ProtoLegacy { |
|
21 |
if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok { |
|
22 |
return fd.EnforceUTF8() |
|
23 |
} |
|
24 |
} |
|
25 |
return fd.Syntax() == protoreflect.Proto3 |
|
26 |
} |
|
27 |
|
|
28 |
// GoCamelCase camel-cases a protobuf name for use as a Go identifier. |
|
29 |
// |
|
30 |
// If there is an interior underscore followed by a lower case letter, |
|
31 |
// drop the underscore and convert the letter to upper case. |
|
32 |
func GoCamelCase(s string) string { |
|
33 |
// Invariant: if the next letter is lower case, it must be converted |
|
34 |
// to upper case. |
|
35 |
// That is, we process a word at a time, where words are marked by _ or |
|
36 |
// upper case letter. Digits are treated as words. |
|
37 |
var b []byte |
|
38 |
for i := 0; i < len(s); i++ { |
|
39 |
c := s[i] |
|
40 |
switch { |
|
41 |
case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]): |
|
42 |
// Skip over '.' in ".{{lowercase}}". |
|
43 |
case c == '.': |
|
44 |
b = append(b, '_') // convert '.' to '_' |
|
45 |
case c == '_' && (i == 0 || s[i-1] == '.'): |
|
46 |
// Convert initial '_' to ensure we start with a capital letter. |
|
47 |
// Do the same for '_' after '.' to match historic behavior. |
|
48 |
b = append(b, 'X') // convert '_' to 'X' |
|
49 |
case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]): |
|
50 |
// Skip over '_' in "_{{lowercase}}". |
|
51 |
case isASCIIDigit(c): |
|
52 |
b = append(b, c) |
|
53 |
default: |
|
54 |
// Assume we have a letter now - if not, it's a bogus identifier. |
|
55 |
// The next word is a sequence of characters that must start upper case. |
|
56 |
if isASCIILower(c) { |
|
57 |
c -= 'a' - 'A' // convert lowercase to uppercase |
|
58 |
} |
|
59 |
b = append(b, c) |
|
60 |
|
|
61 |
// Accept lower case sequence that follows. |
|
62 |
for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ { |
|
63 |
b = append(b, s[i+1]) |
|
64 |
} |
|
65 |
} |
|
66 |
} |
|
67 |
return string(b) |
|
68 |
} |
|
69 |
|
|
70 |
// GoSanitized converts a string to a valid Go identifier. |
|
71 |
func GoSanitized(s string) string { |
|
72 |
// Sanitize the input to the set of valid characters, |
|
73 |
// which must be '_' or be in the Unicode L or N categories. |
|
74 |
s = strings.Map(func(r rune) rune { |
|
75 |
if unicode.IsLetter(r) || unicode.IsDigit(r) { |
|
76 |
return r |
|
77 |
} |
|
78 |
return '_' |
|
79 |
}, s) |
|
80 |
|
|
81 |
// Prepend '_' in the event of a Go keyword conflict or if |
|
82 |
// the identifier is invalid (does not start in the Unicode L category). |
|
83 |
r, _ := utf8.DecodeRuneInString(s) |
|
84 |
if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) { |
|
85 |
return "_" + s |
|
86 |
} |
|
87 |
return s |
|
88 |
} |
|
89 |
|
|
90 |
// JSONCamelCase converts a snake_case identifier to a camelCase identifier, |
|
91 |
// according to the protobuf JSON specification. |
|
92 |
func JSONCamelCase(s string) string { |
|
93 |
var b []byte |
|
94 |
var wasUnderscore bool |
|
95 |
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII |
|
96 |
c := s[i] |
|
97 |
if c != '_' { |
|
98 |
if wasUnderscore && isASCIILower(c) { |
|
99 |
c -= 'a' - 'A' // convert to uppercase |
|
100 |
} |
|
101 |
b = append(b, c) |
|
102 |
} |
|
103 |
wasUnderscore = c == '_' |
|
104 |
} |
|
105 |
return string(b) |
|
106 |
} |
|
107 |
|
|
108 |
// JSONSnakeCase converts a camelCase identifier to a snake_case identifier, |
|
109 |
// according to the protobuf JSON specification. |
|
110 |
func JSONSnakeCase(s string) string { |
|
111 |
var b []byte |
|
112 |
for i := 0; i < len(s); i++ { // proto identifiers are always ASCII |
|
113 |
c := s[i] |
|
114 |
if isASCIIUpper(c) { |
|
115 |
b = append(b, '_') |
|
116 |
c += 'a' - 'A' // convert to lowercase |
|
117 |
} |
|
118 |
b = append(b, c) |
|
119 |
} |
|
120 |
return string(b) |
|
121 |
} |
|
122 |
|
|
123 |
// MapEntryName derives the name of the map entry message given the field name. |
|
124 |
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057 |
|
125 |
func MapEntryName(s string) string { |
|
126 |
var b []byte |
|
127 |
upperNext := true |
|
128 |
for _, c := range s { |
|
129 |
switch { |
|
130 |
case c == '_': |
|
131 |
upperNext = true |
|
132 |
case upperNext: |
|
133 |
b = append(b, byte(unicode.ToUpper(c))) |
|
134 |
upperNext = false |
|
135 |
default: |
|
136 |
b = append(b, byte(c)) |
|
137 |
} |
|
138 |
} |
|
139 |
b = append(b, "Entry"...) |
|
140 |
return string(b) |
|
141 |
} |
|
142 |
|
|
143 |
// EnumValueName derives the camel-cased enum value name. |
|
144 |
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313 |
|
145 |
func EnumValueName(s string) string { |
|
146 |
var b []byte |
|
147 |
upperNext := true |
|
148 |
for _, c := range s { |
|
149 |
switch { |
|
150 |
case c == '_': |
|
151 |
upperNext = true |
|
152 |
case upperNext: |
|
153 |
b = append(b, byte(unicode.ToUpper(c))) |
|
154 |
upperNext = false |
|
155 |
default: |
|
156 |
b = append(b, byte(unicode.ToLower(c))) |
|
157 |
upperNext = false |
|
158 |
} |
|
159 |
} |
|
160 |
return string(b) |
|
161 |
} |
|
162 |
|
|
163 |
// TrimEnumPrefix trims the enum name prefix from an enum value name, |
|
164 |
// where the prefix is all lowercase without underscores. |
|
165 |
// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375 |
|
166 |
func TrimEnumPrefix(s, prefix string) string { |
|
167 |
s0 := s // original input |
|
168 |
for len(s) > 0 && len(prefix) > 0 { |
|
169 |
if s[0] == '_' { |
|
170 |
s = s[1:] |
|
171 |
continue |
|
172 |
} |
|
173 |
if unicode.ToLower(rune(s[0])) != rune(prefix[0]) { |
|
174 |
return s0 // no prefix match |
|
175 |
} |
|
176 |
s, prefix = s[1:], prefix[1:] |
|
177 |
} |
|
178 |
if len(prefix) > 0 { |
|
179 |
return s0 // no prefix match |
|
180 |
} |
|
181 |
s = strings.TrimLeft(s, "_") |
|
182 |
if len(s) == 0 { |
|
183 |
return s0 // avoid returning empty string |
|
184 |
} |
|
185 |
return s |
|
186 |
} |
|
187 |
|
|
188 |
func isASCIILower(c byte) bool { |
|
189 |
return 'a' <= c && c <= 'z' |
|
190 |
} |
|
191 |
func isASCIIUpper(c byte) bool { |
|
192 |
return 'A' <= c && c <= 'Z' |
|
193 |
} |
|
194 |
func isASCIIDigit(c byte) bool { |
|
195 |
return '0' <= c && c <= '9' |
|
196 |
} |