Files

DomainName::Punycode

Constants

BASE
TMIN
TMAX
SKEW
DAMP
INITIAL_BIAS
INITIAL_N
DELIMITER
MAXINT
LOBASE
CUTOFF
RE_NONBASIC
DECODE_DIGIT

Public Class Methods

decode(string) click to toggle source

Decode a string encoded in Punycode

     # File lib/domain_name/punycode.rb, line 198
198:       def decode(string)
199:         # Initialize the state
200:         n = INITIAL_N
201:         i = 0
202:         bias = INITIAL_BIAS
203: 
204:         if j = string.rindex(DELIMITER)
205:           b = string[0...j]
206: 
207:           b.match(RE_NONBASIC) and
208:             raise ArgumentError, "Illegal character is found in basic part: #{string.inspect}"
209: 
210:           # Handle the basic code points
211: 
212:           output = b.unpack('U*')
213:           u = string[(j + 1)..1]
214:         else
215:           output = []
216:           u = string
217:         end
218: 
219:         # Main decoding loop: Start just after the last delimiter if any
220:         # basic code points were copied; start at the beginning
221:         # otherwise.
222: 
223:         input = u.unpack('C*')
224:         input_length = input.length
225:         h = 0
226:         out = output.length
227: 
228:         while h < input_length
229:           # Decode a generalized variable-length integer into delta,
230:           # which gets added to i.  The overflow checking is easier
231:           # if we increase i as we go, then subtract off its starting
232:           # value at the end to obtain delta.
233: 
234:           oldi = i
235:           w = 1
236:           k = BASE
237: 
238:           loop {
239:             digit = decode_digit(input[h]) or
240:             raise ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}"
241:             h += 1
242:             i += digit * w
243:             raise BufferOverflowError if i > MAXINT
244:             t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
245:             break if digit < t
246:             w *= BASE - t
247:             raise BufferOverflowError if w > MAXINT
248:             k += BASE
249:             h < input_length or raise ArgumentError, "Malformed input given: #{string.inspect}"
250:           }
251: 
252:           # Adapt the bias
253:           delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1
254:           delta += delta / (out + 1)
255:           bias = 0
256:           while delta > CUTOFF
257:             delta /= LOBASE
258:             bias += BASE
259:           end
260:           bias += (LOBASE + 1) * delta / (delta + SKEW)
261: 
262:           # i was supposed to wrap around from out+1 to 0, incrementing
263:           # n each time, so we'll fix that now:
264: 
265:           q, i = i.divmod(out + 1)
266:           n += q
267:           raise BufferOverflowError if n > MAXINT
268: 
269:           # Insert n at position i of the output:
270: 
271:           output[i, 0] = n
272: 
273:           out += 1
274:           i += 1
275:         end
276:         output.pack('U*')
277:       end
decode_hostname(hostname) click to toggle source

Decode a hostname using IDN/Punycode algorithms

     # File lib/domain_name/punycode.rb, line 280
280:       def decode_hostname(hostname)
281:         hostname.gsub(/(\A|\.)xn--([^.]*)/) {
282:           $1 << decode($2)
283:         }
284:       end
encode(string) click to toggle source

Encode a string in Punycode

     # File lib/domain_name/punycode.rb, line 105
105:       def encode(string)
106:         input = string.unpack('U*')
107:         output = ''
108: 
109:         # Initialize the state
110:         n = INITIAL_N
111:         delta = 0
112:         bias = INITIAL_BIAS
113: 
114:         # Handle the basic code points
115:         input.each { |cp| output << cp.chr if cp < 0x80 }
116: 
117:         h = b = output.length
118: 
119:         # h is the number of code points that have been handled, b is the
120:         # number of basic code points, and out is the number of characters
121:         # that have been output.
122: 
123:         output << DELIMITER if b > 0
124: 
125:         # Main encoding loop
126: 
127:         while h < input.length
128:           # All non-basic code points < n have been handled already.  Find
129:           # the next larger one
130: 
131:           m = MAXINT
132:           input.each { |cp|
133:             m = cp if (n...m) === cp
134:           }
135: 
136:           # Increase delta enough to advance the decoder's <n,i> state to
137:           # <m,0>, but guard against overflow
138: 
139:           delta += (m - n) * (h + 1)
140:           raise BufferOverflowError if delta > MAXINT
141:           n = m
142: 
143:           input.each { |cp|
144:             # AMC-ACE-Z can use this simplified version instead
145:             if cp < n
146:               delta += 1
147:               raise BufferOverflowError if delta > MAXINT
148:             elsif cp == n
149:               # Represent delta as a generalized variable-length integer
150:               q = delta
151:               k = BASE
152:               loop {
153:                 t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
154:                 break if q < t
155:                 q, r = (q - t).divmod(BASE - t)
156:                 output << encode_digit(t + r, false)
157:                 k += BASE
158:               }
159: 
160:               output << encode_digit(q, false)
161: 
162:               # Adapt the bias
163:               delta = h == b ? delta / DAMP : delta >> 1
164:               delta += delta / (h + 1)
165:               bias = 0
166:               while delta > CUTOFF
167:                 delta /= LOBASE
168:                 bias += BASE
169:               end
170:               bias += (LOBASE + 1) * delta / (delta + SKEW)
171: 
172:               delta = 0
173:               h += 1
174:             end
175:           }
176: 
177:           delta += 1
178:           n += 1
179:         end
180: 
181:         output
182:       end
encode_hostname(hostname) click to toggle source

Encode a hostname using IDN/Punycode algorithms

     # File lib/domain_name/punycode.rb, line 185
185:       def encode_hostname(hostname)
186:         hostname.match(RE_NONBASIC) or return hostname
187: 
188:         hostname.split('.').map { |name|
189:           if name.match(RE_NONBASIC)
190:             'xn--' << encode(name)
191:           else
192:             name
193:           end
194:         }.join('.')
195:       end

Private Class Methods

decode_digit(cp) click to toggle source

Returns the numeric value of a basic code point (for use in representing integers) in the range 0 to base-1, or nil if cp is does not represent a value.

     # File lib/domain_name/punycode.rb, line 98
 98:       def decode_digit(cp)
 99:         DECODE_DIGIT[cp]
100:       end
encode_digit(d, flag) click to toggle source

Returns the basic code point whose value (when used for representing integers) is d, which must be in the range 0 to BASE-1. The lowercase form is used unless flag is true, in which case the uppercase form is used. The behavior is undefined if flag is nonzero and digit d has no uppercase form.

    # File lib/domain_name/punycode.rb, line 89
89:       def encode_digit(d, flag)
90:         (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
91:         #  0..25 map to ASCII a..z or A..Z
92:         # 26..35 map to ASCII 0..9
93:       end

Disabled; run with --debug to generate this.

[Validate]

Generated with the Darkfish Rdoc Generator 1.1.6.