Decode a string encoded in Punycode
# File lib/domain_name/punycode.rb, line 198 198: def decode(string) 199: # Initialize the state 200: n = INITIAL_N 201: i = 0 202: bias = INITIAL_BIAS 203: 204: if j = string.rindex(DELIMITER) 205: b = string[0...j] 206: 207: b.match(RE_NONBASIC) and 208: raise ArgumentError, "Illegal character is found in basic part: #{string.inspect}" 209: 210: # Handle the basic code points 211: 212: output = b.unpack('U*') 213: u = string[(j + 1)..1] 214: else 215: output = [] 216: u = string 217: end 218: 219: # Main decoding loop: Start just after the last delimiter if any 220: # basic code points were copied; start at the beginning 221: # otherwise. 222: 223: input = u.unpack('C*') 224: input_length = input.length 225: h = 0 226: out = output.length 227: 228: while h < input_length 229: # Decode a generalized variable-length integer into delta, 230: # which gets added to i. The overflow checking is easier 231: # if we increase i as we go, then subtract off its starting 232: # value at the end to obtain delta. 233: 234: oldi = i 235: w = 1 236: k = BASE 237: 238: loop { 239: digit = decode_digit(input[h]) or 240: raise ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}" 241: h += 1 242: i += digit * w 243: raise BufferOverflowError if i > MAXINT 244: t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias 245: break if digit < t 246: w *= BASE - t 247: raise BufferOverflowError if w > MAXINT 248: k += BASE 249: h < input_length or raise ArgumentError, "Malformed input given: #{string.inspect}" 250: } 251: 252: # Adapt the bias 253: delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1 254: delta += delta / (out + 1) 255: bias = 0 256: while delta > CUTOFF 257: delta /= LOBASE 258: bias += BASE 259: end 260: bias += (LOBASE + 1) * delta / (delta + SKEW) 261: 262: # i was supposed to wrap around from out+1 to 0, incrementing 263: # n each time, so we'll fix that now: 264: 265: q, i = i.divmod(out + 1) 266: n += q 267: raise BufferOverflowError if n > MAXINT 268: 269: # Insert n at position i of the output: 270: 271: output[i, 0] = n 272: 273: out += 1 274: i += 1 275: end 276: output.pack('U*') 277: end
Decode a hostname using IDN/Punycode algorithms
# File lib/domain_name/punycode.rb, line 280 280: def decode_hostname(hostname) 281: hostname.gsub(/(\A|\.)xn--([^.]*)/) { 282: $1 << decode($2) 283: } 284: end
Encode a string in Punycode
# File lib/domain_name/punycode.rb, line 105 105: def encode(string) 106: input = string.unpack('U*') 107: output = '' 108: 109: # Initialize the state 110: n = INITIAL_N 111: delta = 0 112: bias = INITIAL_BIAS 113: 114: # Handle the basic code points 115: input.each { |cp| output << cp.chr if cp < 0x80 } 116: 117: h = b = output.length 118: 119: # h is the number of code points that have been handled, b is the 120: # number of basic code points, and out is the number of characters 121: # that have been output. 122: 123: output << DELIMITER if b > 0 124: 125: # Main encoding loop 126: 127: while h < input.length 128: # All non-basic code points < n have been handled already. Find 129: # the next larger one 130: 131: m = MAXINT 132: input.each { |cp| 133: m = cp if (n...m) === cp 134: } 135: 136: # Increase delta enough to advance the decoder's <n,i> state to 137: # <m,0>, but guard against overflow 138: 139: delta += (m - n) * (h + 1) 140: raise BufferOverflowError if delta > MAXINT 141: n = m 142: 143: input.each { |cp| 144: # AMC-ACE-Z can use this simplified version instead 145: if cp < n 146: delta += 1 147: raise BufferOverflowError if delta > MAXINT 148: elsif cp == n 149: # Represent delta as a generalized variable-length integer 150: q = delta 151: k = BASE 152: loop { 153: t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias 154: break if q < t 155: q, r = (q - t).divmod(BASE - t) 156: output << encode_digit(t + r, false) 157: k += BASE 158: } 159: 160: output << encode_digit(q, false) 161: 162: # Adapt the bias 163: delta = h == b ? delta / DAMP : delta >> 1 164: delta += delta / (h + 1) 165: bias = 0 166: while delta > CUTOFF 167: delta /= LOBASE 168: bias += BASE 169: end 170: bias += (LOBASE + 1) * delta / (delta + SKEW) 171: 172: delta = 0 173: h += 1 174: end 175: } 176: 177: delta += 1 178: n += 1 179: end 180: 181: output 182: end
Encode a hostname using IDN/Punycode algorithms
# File lib/domain_name/punycode.rb, line 185 185: def encode_hostname(hostname) 186: hostname.match(RE_NONBASIC) or return hostname 187: 188: hostname.split('.').map { |name| 189: if name.match(RE_NONBASIC) 190: 'xn--' << encode(name) 191: else 192: name 193: end 194: }.join('.') 195: end
Returns the numeric value of a basic code point (for use in representing integers) in the range 0 to base-1, or nil if cp is does not represent a value.
# File lib/domain_name/punycode.rb, line 98 98: def decode_digit(cp) 99: DECODE_DIGIT[cp] 100: end
Returns the basic code point whose value (when used for representing integers) is d, which must be in the range 0 to BASE-1. The lowercase form is used unless flag is true, in which case the uppercase form is used. The behavior is undefined if flag is nonzero and digit d has no uppercase form.
# File lib/domain_name/punycode.rb, line 89 89: def encode_digit(d, flag) 90: (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr 91: # 0..25 map to ASCII a..z or A..Z 92: # 26..35 map to ASCII 0..9 93: end
Disabled; run with --debug to generate this.
Generated with the Darkfish Rdoc Generator 1.1.6.