5

I've got a string with the following format:

City, State ZIP

I'd like to get City and State from this string.

How can I do that with JavaScript? edit: note that he doesn't mention he already has the zip code when he gets here, if that helps you in your solution ~~ drachenstern

jcolebrand
  • 15,923
  • 10
  • 71
  • 117
Moshe
  • 55,729
  • 73
  • 263
  • 420
  • 4
    /([^,]+),\s*(\w{2})\s*(\d{5}(?:-\d{4})?)/ – generalhenry Feb 23 '11 at 22:18
  • http://www.canadapost.ca/cpo/mc/personal/guides/addressing.jsf http://www.canadapost.ca/tools/pg/manual/PGaddress-e.asp http://stackoverflow.com/questions/16413/parse-usable-street-address-city-state-zip-from-a-string – jcolebrand Feb 24 '11 at 16:31

5 Answers5

11
var address = "San Francisco, CA 94129";

function parseAddress(address) {
    // Make sure the address is a string.
    if (typeof address !== "string") throw "Address is not a string.";

    // Trim the address.
    address = address.trim();

    // Make an object to contain the data.
    var returned = {};

    // Find the comma.
    var comma = address.indexOf(',');

    // Pull out the city.
    returned.city = address.slice(0, comma);

    // Get everything after the city.
    var after = address.substring(comma + 2); // The string after the comma, +2 so that we skip the comma and the space.

    // Find the space.
    var space = after.lastIndexOf(' ');

    // Pull out the state.
    returned.state = after.slice(0, space);

    // Pull out the zip code.
    returned.zip = after.substring(space + 1);

    // Return the data.
    return returned;
}

address = parseAddress(address);

This is probably better then using regular expressions and String.split(), as it takes into account that the state and city may have spaces.

EDIT: Bug fix: It only included the first word of multi-word state names.

And here's a minified version. :D

function parseAddress(a) {if(typeof a!=="string") throw "Address is not a string.";a=a.trim();var r={},c=a.indexOf(',');r.city=a.slice(0,c);var f=a.substring(c+2),s=f.lastIndexOf(' ');r.state=f.slice(0,s);r.zip=f.substring(s+1);return r;}
McKayla
  • 6,467
  • 4
  • 29
  • 45
2

There are many ways to do this. Here's a very naive one:

var parts = "City, State ZIP".split(/\s+/); // split on whitespace
var city = parts[0].slice(0, parts[0].length - 1); // remove trailing comma
var state = parts[1];
var zip = parts[2];

Here's one that accounts for the presence of spaces in either the city or state or both:

var parts = "san fran bay, new mex state 666666".split(/\s+|,/),
    partition = parts.indexOf(""),
    city = parts.slice(0, partition).join(" "),
    state = parts.slice(partition + 1, -1).join(" "),
    zip = parts.pop();

This last one only works if you're lucky enough to be in an environment that supports destructuring assignment:

var city, statezip, state, zip, parts;
[city, statezip] = "Spaced City, New Mexico ZIP".split(/,\s*/);
parts = statezip.split(/\s+/);
zip = parts.pop();
state = parts.join(" ");

None of these perform any validation, of course.

Wayne
  • 56,476
  • 13
  • 125
  • 118
  • `San Francisco, CA 11111` or `New York City, NY 11111` seem to break your method. Try again. – jcolebrand Feb 23 '11 at 22:40
  • @drachenstern Because it splits on all spaces. – McKayla Feb 23 '11 at 22:46
  • What kind of city has a space in its name? Sheesh. Anyway, I at least fixed one of them. +1 to @tylermwashburn for attention to the little things -- like, you know, correctness -- but look at all that code! – Wayne Feb 23 '11 at 22:53
  • @drachenstern I was explaining why it breaks. It expects the city to just be one word, so when it has 2, it thinks the second word is the state, and the state is the zip code. – McKayla Feb 23 '11 at 22:54
  • @lwburk function parseAddress(a) {var r={},c=a.indexOf(',');r.city=a.slice(0,c);var f=a.substring(c+2),s=f.lastIndexOf(' ');r.state=f.slice(0,s);r.zip=f.substring(s+1);return r;} xD What now? – McKayla Feb 23 '11 at 23:02
  • function pA(a){var r={},sz,p;[r.c,sz]=a.split(/,\s*/);p=sz.split(/\s+/);r.z=p.pop();r.s=p.join(" ");return r;} – Wayne Feb 23 '11 at 23:18
1

Ok, since advising regex isn't good, here's my solution. It takes into account cities that have spaces in them, which the other responses here don't seem to do:

    var str = "New York, NY 20101";
    var cityAndRest = str.split(',');
    var city = cityAndRest[0];
    var stateAndZip = cityAndRest[1].trim().split(' ');
    var state = stateAndZip[0];
    var zip = stateAndZip[1];
Daniel Ahrnsbrak
  • 1,057
  • 7
  • 15
0

First assumption: American addresses only.

First find out if the last 5 or the last 10 characters are numeric. A simpler test is to see if the last character is numeric. If so, it's probably got the zip code included. Then a simple test to see if the last 10 contains a space (city #####) or if the last ten include a dash (12345-6789) to figure out if it's a 5 or 5+4 zip. We'll test for a hyphen and no space. (city-du-lac 12345 captures -lac 12345)

Next, all addresses split the city and state by a comma, so we want the last comma. Find the index of the last comma, and split there. I don't know of a city that uses commas in it's name, and I'm sure not gonna let my parser burst on an unknown if I can help it. I do ignore the fact that Washington DC could also be Washington, DC. I figure edge cases are for libraries, not one off scripts.

Lastly, trim everything that remains to remove trailing or leading spaces.

function IsNumeric(n) {
  return !isNaN(parseFloat(n)) && isFinite(n);
}

var addr = 'New York City, New York 10101';
//var addr = 'San Bernadino, CA 11111';
function getCityStateZip(addr){
  var city; var state;var zip;
  city = ''; state = ''; zip = '';
  var addrLen = addr.length;
  if ( IsNumeric( addr.substring(addrLen - 1) ) ) {
    //contains a zipcode - just a sanity check
    //get last 10 characters for testing easily
    var lastTen = addr.substring( addrLen - 10 );
    if ( lastTen.indexOf('-') > 0 && ( lastTen.indexOf(' ') == -1 ) ) {
      //found a hyphen and no space (matches our complex rule for zipcodes)
      zip = lastTen;
    } else {
      zip = addr.substring( addrLen - 5 ); //assume a basic 5 zip code
    }
  }
  var zipLen = zip.length;
  addrLen = addrLen - zipLen - 1;
  addr = addr.substring(0, addrLen ); //remove the chars we just moved into zip

  var lastComma = addr.lastIndexOf(',');
  if ( lastComma == -1 ) {
    //you have a problem, how do you want to handle it?
  }
  city = addr.substring(0,lastComma); //skip the comma itself, yes?
  state = addr.substring(lastComma + 2);
  return { 'city':city,'state': state,'zip': zip};
}

getCityStateZip(addr)

IsNumeric js function can be found here Validate decimal numbers in JavaScript - IsNumeric()

Community
  • 1
  • 1
jcolebrand
  • 15,923
  • 10
  • 71
  • 117
  • American and Canadian addresses, sorry, aoudad clarified b – Moshe Feb 24 '11 at 16:01
  • So I'm not familiar with Canadian postal addresses, there may exist some gotcha's that I don't know about. Are you sure there's not an address parsing library out there already? – jcolebrand Feb 24 '11 at 16:10
-1

For this type of thing you might want to use JavaScripts RegEx functions.

Here's some info:

http://www.javascriptkit.com/javatutors/re.shtml

Daniel Ahrnsbrak
  • 1,057
  • 7
  • 15