smithvoice.com
 Y'herd thisun? 

“Nothing is impossible on earth; one must only discover the means with which it can be carried out.”

from Ways of Spaceflight, 1928 (NASA F-622) by Hermann Oberth

Simple address formating

TaggedCoding, CSharp, VB

If you've ever bought address records or hired a company to do DE following a major direct mailing or just had to work a site that gets a lot of address posted you see how how little effort is usually spent on getting address strings into a standard format to help cut down on duplicates and make query results more useful. That's really too bad because it's not that hard to get the lion's share of the raw text into a consistent format.

The simple class below is a good one for your utility library. It takes a raw address string, removes extra spaces then converts direction and street labels to standard abbreviations.

That alone seems obvious and not worth our putting here for copy & pasting, the logic that makes it worth posting is the additional tedium of the look-aheads that account for double labels such as the common "Turnpike Road" that technically should be "Turnpike Rd" and not "Tpk Rd" and also directional situations such as in this example of a street that was named after a person named "West."

vb.net:

 

Public Class AddressFixer
  
#Region "Declarations"
Private m_Directions As CustomDictionary
Private m_Streets As CustomDictionary
  
#End Region
  
#Region "Public interface"
Public Function FixAddress(ByVal value As String) As String
InitDirections()
InitStreets()
  
Return DoTheFix(StrConv(value, VbStrConv.ProperCase))
  
''optionally hook into the apostrophe/acronym fixer from
'Dim a As New ApostonymFixer
'Try
'    Return a.FixText(DoTheFix(value), _
'        ApostonymFixer.rcsTypeOfCheck.rcsCheckApostroNym)
'Catch ex As Exception
'    Return value
'Finally
'    a = Nothing
'End Try
  
End Function
  
#End Region
  
#Region "Private methods"
Private Function PrepareString(ByVal value As String) As String
value = value.Trim
Do While value.IndexOf("  ") > 0
 value = value.Replace("  ", " ")
Loop
Return value
  
End Function
  
Private Function DoTheFix(ByVal value As String) As String
'now do the checking
Dim DE As DictionaryEntry
Dim sTemp As String = PrepareString(value)
  
Dim s() As String = sTemp.Split(" "c)
  
Dim iCounter As Integer = 0
  
For Each DE In m_Streets
 For iCounter = s.Length - 1 To 0 Step -1
 If s(iCounter).ToUpper.Equals(CType(DE.Key, String).ToUpper) Then
  
 If iCounter + 1 = s.Length Then
 s(iCounter) = DE.Value
 ElseIf iCounter + 1 < s.Length Then
 If Not m_Streets.Contains(s(iCounter + 1)) Then
 s(iCounter) = DE.Value
 End If
 End If
  
 ' if the element following this one is a
' street (long or short) then don't shorten this one
  
 End If
 Next
Next
  
For Each DE In m_Directions
 For iCounter = s.Length - 1 To 0 Step -1
 If s(iCounter).ToUpper.Equals(CType(DE.Key, String).ToUpper) Then
  
 If iCounter + 1 = s.Length Then
 s(iCounter) = DE.Value
 ElseIf iCounter + 1 < s.Length Then
 If Not m_Streets.Contains(s(iCounter + 1)) Then
 s(iCounter) = DE.Value
 End If
 End If
  
 ' if the element following this one is a street
' (long or short) then don't shorten this one
  
 End If
 Next
Next
  
Return Join(s, " ")
  
End Function
  
#End Region
  
#Region "Housekeeping"
Protected Overrides Sub Finalize()
MyBase.Finalize()
End Sub
  
#End Region
  
#Region "Initializers"
Private Sub InitDirections()
'hard coding used for example
'you could use a data structure
'such as a db, dataset/xml file
'which will let you add checks without recompiling
  
If m_Directions Is Nothing Then
 m_Directions = New CustomDictionary
  
 m_Directions.Add("North", "N")
 m_Directions.Add("NorthEast", "N E")
 m_Directions.Add("NorthWest", "N W")
 m_Directions.Add("South", "S")
 m_Directions.Add("SouthEast", "S E")
 m_Directions.Add("SouthWest", "S W")
 m_Directions.Add("East", "E")
 m_Directions.Add("West", "W")
  
End If
  
End Sub
  
Private Sub InitStreets()
'hard coding used for example
'you could use a data structure
'such as a db, dataset/xml file
'which will let you add checks without recompiling
  
If m_Streets Is Nothing Then
 m_Streets = New CustomDictionary
  
 m_Streets.Add("avenue", "Av")
 m_Streets.Add("ave", "Av")
 m_Streets.Add("boulevard", "Bvd")
 m_Streets.Add("circle", "Cir")
 m_Streets.Add("commons", "Cmns")
 m_Streets.Add("cmmns", "Cmns")
 m_Streets.Add("court", "Ct")
 m_Streets.Add("crt", "Ct")
 m_Streets.Add("drive", "Dr")
 m_Streets.Add("drv", "Dr")
 m_Streets.Add("extension", "Ext")
 m_Streets.Add("freeway", "Fwy")
 m_Streets.Add("heights", "Hts")
 m_Streets.Add("hgts", "Hts")
 m_Streets.Add("highway", "Hwy")
 m_Streets.Add("lane", "Ln")
 m_Streets.Add("parkway", "Pwy")
 m_Streets.Add("pkwy", "Pwy")
 m_Streets.Add("plaza", "Plz")
 m_Streets.Add("road", "Rd")
 m_Streets.Add("street", "St")
 m_Streets.Add("str", "St")
 m_Streets.Add("turnpike", "Tpk")
 m_Streets.Add("tnpk", "Tpk")
 m_Streets.Add("tpke", "Tpk")
  
End If
End Sub
#End Region
  
  
  
  
#Region "Dictionary"
'we use a dictionary simply to play off of its Contains method
'being private we can keep its protection code minimal
Private Class CustomDictionary
Inherits DictionaryBase
  
Friend Sub Add(ByVal fullValue As String, ByVal shortValue As String)
  
 MyBase.Dictionary.Add(fullValue, shortValue)
End Sub
  
  
Friend Sub Remove(ByVal fullValue As String)
 If MyBase.InnerHashtable.Contains(fullValue) Then
 MyBase.Dictionary.Remove(fullValue)
 End If
End Sub
  
Friend Function Contains(ByVal Value As String) As Boolean
 If MyBase.InnerHashtable.ContainsValue(Value) Or _
 MyBase.InnerHashtable.ContainsKey(Value) Then
 Return True
 Else
 Return False
 End If
End Function
  
Default Friend Property Item(ByVal fullValue As String) As String
 Get
 Return MyBase.Dictionary.Item(fullValue)
 End Get
 Set(ByVal Value As String)
 MyBase.Dictionary.Item(fullValue) = Value
 End Set
End Property
  
Protected Overrides Sub Finalize()
 MyBase.Finalize()
End Sub
End Class
#End Region
  
End Class

c#

 

using System;
using System.Collections;
using vb = Microsoft.VisualBasic;
  
namespace smithvoice
{
  
public class AddressFixer
{
#region "Declarations"
private CustomDictionary m_Directions;
private CustomDictionary m_Streets;
#endregion
  
#region "Public interface"
  
public string FixAddress(string value)
{
 InitDirections();
 InitStreets();
  
 return DoTheFix(vb.Strings.StrConv(value, vb.VbStrConv.ProperCase, 0));
  
 //'optionally hook into the apostrophe/acronym fixer from
  
}
#endregion
  
#region "Private methods"
private string PrepareString(string value)
{
 value = value.Trim();
 while (value.IndexOf("  ") > 0)
 {
 value = value.Replace("  ", " ");
 }
 return value;
  
}
  
private string DoTheFix(string value)
{
 //now do the checking
 //DictionaryEntry DE;
 string sTemp = PrepareString(value);
  
 string[] s = sTemp.Split(' ');
  
 int iCounter = 0;
  
 foreach (DictionaryEntry DE in m_Streets)
 {
 for (iCounter = s.Length - 1; iCounter >= 0; iCounter += -1)
 {
  
 if (s[iCounter].ToUpper().Equals(((string)DE.Key).ToUpper()))
 {
  
 if (iCounter + 1 == s.Length)
 {
 s[iCounter] = (string)DE.Value;
 }
 else if (iCounter + 1 < s.Length)
 {
 if (!m_Streets.Contains(s[iCounter + 1]))
 {
 s[iCounter] = (string)DE.Value;
 }
 }
  
 // if the element following this one is a
 // street (long or short) then don't shorten this one
  
 }
 }
 }
  
 for (iCounter = s.Length - 1; iCounter >= 0; iCounter += -1)
 {
 foreach (DictionaryEntry DE in m_Directions)
 {
 if (s[iCounter].ToUpper().Equals(((string)DE.Key).ToUpper()))
 {
  
 if (iCounter + 1 == s.Length)
 {
 s[iCounter] = (string)DE.Value;
 }
 else if (iCounter + 1 < s.Length)
 {
 if (!m_Streets.Contains(s[iCounter + 1]))
 {
 s[iCounter] = (string)DE.Value;
 }
 }
  
 // if the element following this one is a street
 // (long or short) then don't shorten this one
  
 }
 }
 }
  
 return String.Join(" ", s);
  
}
#endregion
  
 
#region "Initializers"
 
private void InitDirections()
{
 //hard coding used for example
 //you could use a data structure
 //such as a db, dataset/xml file
 //which will let you add checks without recompiling
  
 if (m_Directions == null)
 {
 m_Directions = new CustomDictionary();
  
 m_Directions.Add("North", "N");
 m_Directions.Add("NorthEast", "N E");
 m_Directions.Add("NorthWest", "N W");
 m_Directions.Add("South", "S");
 m_Directions.Add("SouthEast", "S E");
 m_Directions.Add("SouthWest", "S W");
 m_Directions.Add("East", "E");
 m_Directions.Add("West", "W");
  
 }
  
}
  
private void InitStreets()
{
 //hard coding used for example
 //you could use a data structure
 //such as a db, dataset/xml file
 //which will let you add checks without recompiling
  
 if (m_Streets == null)
 {
 m_Streets = new CustomDictionary();
  
 m_Streets.Add("avenue", "Av");
 m_Streets.Add("ave", "Av");
 m_Streets.Add("boulevard", "Bvd");
 m_Streets.Add("circle", "Cir");
 m_Streets.Add("commons", "Cmns");
 m_Streets.Add("cmmns", "Cmns");
 m_Streets.Add("court", "Ct");
 m_Streets.Add("crt", "Ct");
 m_Streets.Add("drive", "Dr");
 m_Streets.Add("drv", "Dr");
 m_Streets.Add("extension", "Ext");
 m_Streets.Add("freeway", "Fwy");
 m_Streets.Add("heights", "Hts");
 m_Streets.Add("hgts", "Hts");
 m_Streets.Add("highway", "Hwy");
 m_Streets.Add("lane", "Ln");
 m_Streets.Add("parkway", "Pwy");
 m_Streets.Add("pkwy", "Pwy");
 m_Streets.Add("plaza", "Plz");
 m_Streets.Add("road", "Rd");
 m_Streets.Add("street", "St");
 m_Streets.Add("str", "St");
 m_Streets.Add("turnpike", "Tpk");
 m_Streets.Add("tnpk", "Tpk");
 m_Streets.Add("tpke", "Tpk");
  
 //could be useful to also standardize floor, apartment type stuff
 m_Streets.Add("suite", "Ste");
 m_Streets.Add("floor", "Flr");
 m_Streets.Add("unit", "#");
  
 }
}
#endregion
#region "Dictionary"
//we use a dictionary simply to play off of its Contains method
//being private we can keep its protection code minimal
private class CustomDictionary : DictionaryBase
{
  
 internal void Add(string fullValue, string shortValue)
 {
  
 base.Dictionary.Add(fullValue, shortValue);
 }
  
  
 internal bool Contains(string Value)
 {
 Hashtable ht = base.InnerHashtable;
 if (ht.ContainsValue(Value) | ht.ContainsKey(Value))
 {
 return true;
 }
 else
 {
 return false;
 }
 }
  
}
#endregion
  
}
}

Using it is easy, just create an instance and call the public function:

Private Sub butDoIt_Click(ByVal sender As System.Object, _
ByVal e As System.EventArgs) Handles butDoIt.Click
  
Dim a As New AddressFixer
lblOutput.Text = a.FixAddress(txtInput.Text)
  
End Sub

 

Obviously, all that is going on here is in-place standardization of street and direction labels. While that standardization wil go a long way for many databases it's not the same as true Delivery Point Verification.

The next step would be to pull back on the moniker groupings then intelligently reposition the address elements to also follow a standard format, for example so that an original '123 main street northeast' would go from '123 Main St NE' to '123 NE Main St'.

Beyond that comes inclusion of the city, state and postal code and the checking of the whole against actual USPS or other country official files. There are services and products out there that can do all of this for you including the free-after-registration USPS Web Tools, the fee-based CDyne Address Corrector and Geocoder (test that data extensively before signing up, it has a lot of very cool options but to me the fundamental DPV appears to be a bit behind reality) and the nice in-house data options from QAS, but if you like the satisfactioon of figuring things out for yourself then you could also start with the simple code above and integrate it into checks against the raw data that is updated routinely and provided for free by the USPS on their CASS Files page.

Hope it helps.

Robert Smith
Kirkland WA



home     who is smith    contact smith     rss feed π
Since 1997 a place for my stuff, and it if helps you too then all the better smithvoice.com