UVa 760 – DNA Sequencing

0

Problem Link : http://uva.onlinejudge.org/index.php?option=com_onlinejudge&Itemid=8&category=24&page=show_problem&problem=701


Solution Idea:
1. Concatenate two string then build suffix array and LCP array.
1. Now find those position in LCP array where two consecutive suffix is from two
different input string.
3. Now take the maximum of LCP[these indexes]
4. After that generate string from this position with the determined maximum
length value.
5. Print the distinct strings from these strings.


#include <bits/stdc++.h>

#define pii              pair <int,int>
#define pll              pair <long long,long long>
#define sc               scanf
#define pf               printf
#define Pi               2*acos(0.0)
#define ms(a,b)          memset(a, b, sizeof(a))
#define pb(a)            push_back(a)
#define MP               make_pair
#define db               double
#define ll               long long
#define EPS              10E-10
#define ff               first
#define ss               second
#define sqr(x)           (x)*(x)
#define D(x)             cout<<#x " = "<<(x)<<endl
#define VI               vector <int>
#define DBG              pf("Hi\n")
#define MOD              1000000007
#define CIN              ios_base::sync_with_stdio(0); cin.tie(0); cout.tie(0)
#define SZ(a)            (int)a.size()
#define sf(a)            scanf("%d",&a)
#define sfl(a)           scanf("%lld",&a)
#define sff(a,b)         scanf("%d %d",&a,&b)
#define sffl(a,b)        scanf("%lld %lld",&a,&b)
#define sfff(a,b,c)      scanf("%d %d %d",&a,&b,&c)
#define sfffl(a,b,c)     scanf("%lld %lld %lld",&a,&b,&c)
#define stlloop(v)       for(__typeof(v.begin()) it=v.begin();it!=v.end();it++)
#define loop(i,n)        for(int i=0;i<n;i++)
#define loop1(i,n)       for(int i=1;i<=n;i++)
#define REP(i,a,b)       for(int i=a;i<b;i++)
#define RREP(i,a,b)      for(int i=a;i>=b;i--)
#define TEST_CASE(t)     for(int z=1;z<=t;z++)
#define PRINT_CASE       printf("Case %d: ",z)
#define LINE_PRINT_CASE  printf("Case %d:\n",z)
#define CASE_PRINT       cout<<"Case "<<z<<": "
#define all(a)           a.begin(),a.end()
#define intlim           2147483648
#define infinity         (1<<28)
#define ull              unsigned long long
#define gcd(a, b)        __gcd(a, b)
#define lcm(a, b)        ((a)*((b)/gcd(a,b)))

using namespace std;


/*----------------------Graph Moves----------------*/
//const int fx[]={+1,-1,+0,+0};
//const int fy[]={+0,+0,+1,-1};
//const int fx[]={+0,+0,+1,-1,-1,+1,-1,+1};   // Kings Move
//const int fy[]={-1,+1,+0,+0,+1,+1,-1,-1};  // Kings Move
//const int fx[]={-2, -2, -1, -1,  1,  1,  2,  2};  // Knights Move
//const int fy[]={-1,  1, -2,  2, -2,  2, -1,  1}; // Knights Move
/*------------------------------------------------*/

/*-----------------------Bitmask------------------*/
//int Set(int N,int pos){return N=N | (1<<pos);}
//int reset(int N,int pos){return N= N & ~(1<<pos);}
//bool check(int N,int pos){return (bool)(N & (1<<pos));}
/*------------------------------------------------*/


#define mx 1005

int SA[mx],tempSA[mx];
int RA[mx],tempRA[mx];
int c[mx];
int LCP[mx],PLCP[mx],Phi[mx];
char str[mx],str1[mx];
int n;

void counting_sort(int k)
{
    int maxi=max(300,n);
    ms(c,0);
    for(int i=0; i<n; i++)
    {
        int a=i+k<n?RA[i+k]:0;
        c[a]++;
    }
    for(int i=0,sum=0; i<maxi; i++)
    {
        int x=c[i];
        c[i]=sum;
        sum+=x;
    }

    for(int i=0; i<n; i++)
    {
        int a=SA[i]+k<n?RA[SA[i]+k]:0;
        int b=c[a];
        c[a]++;
        tempSA[b]=SA[i];
    }

    for(int i=0; i<n; i++)
        SA[i]=tempSA[i];
}

void build_Suffix_Array()
{
    for(int i=0; i<n; i++)
    {
        RA[i]=str[i];
        SA[i]=i;
    }

    for(int k=1; k<n; k*=2)
    {
        counting_sort(k);
        counting_sort(0);
        int r=0;
        tempRA[SA[0]]=r=0;
        for(int i=1; i<n; i++)
        {
            if(RA[SA[i]]==RA[SA[i-1]] && RA[SA[i]+k]==RA[SA[i-1]+k])
                tempRA[SA[i]]=r;
            else
                tempRA[SA[i]]=++r;
        }
        for(int i=0; i<n; i++)
        {
            RA[i]=tempRA[i];
        }
        if(RA[SA[n-1]]==n-1) break;
    }
}

void build_LCP()
{
    Phi[SA[0]]=-1;
    for(int i=1; i<n; i++)
        Phi[SA[i]]=SA[i-1];
    for(int i=0,L=0; i<n; i++)
    {
        if(Phi[i]==-1)
        {
            PLCP[i]=0;
            continue;
        }
        while(str[i+L]==str[Phi[i]+L]) L++;
        PLCP[i]=L;
        L=max(L-1,0);
    }

    for(int i=0; i<n; i++)
        LCP[i]=PLCP[SA[i]];
}

map<string,bool>mp;

int main()
{

//    freopen("in.txt","r",stdin);
//	  freopen("out.txt","w",stdout);

    bool check=0;
    while(scanf("%s",str)!=EOF)
    {
        scanf("%s",str1);
        strcat(str,"$");
        int l1=strlen(str)-1;
        strcat(str,str1);
        strcat(str,"#");
        n=strlen(str);
        build_Suffix_Array();
        build_LCP();
        string str2=str;
        int ans=0,maxi=0;

        if(check)
            pf("\n");
        check=1;

//        for(int i=0;i<n;i++)
//            cout<<SA[i]<<" ";
//        cout<<endl;
//        for(int i=0;i<n;i++)
//            cout<<LCP[i]<<" ";
//        cout<<endl;
//
//        for(int i=0;i<n;i++)
//        {
//            cout<<str2.substr(SA[i])<<endl;
//        }
//        cout<<endl;

        for(int i=1; i<n; i++)
        {
            if((SA[i]<l1 && SA[i-1]<n && SA[i-1]>l1) || (SA[i]>l1 && SA[i]<n && SA[i-1]<l1))
                maxi=max(maxi,LCP[i]);
        }

        if(maxi==0)
        {
            pf("No common sequence.\n");
            continue;
        }

        mp.clear();

        for(int i=1; i<n; i++)
        {
            if((SA[i]<l1 && SA[i-1]<n && SA[i-1]>l1)|| (SA[i]>l1 && SA[i]<n && SA[i-1]<l1))
            {
                if(LCP[i]==maxi)
                {
                    string temp=str2.substr(SA[i],maxi);
                    if(mp[temp]) continue;
                    else
                        mp[temp]=1;
                    printf("%s\n",temp.c_str());
                }
            }
        }
    }


    return 0;
}


UVa 10679 – I Love Strings!!

0

Problem Link : https://uva.onlinejudge.org/index.php?option=com_onlinejudge&Itemid=8&page=show_problem&problem=1620

Solution Idea: Build Suffix Array then binary search on the sorted sequence.


#include <bits/stdc++.h>

#define pii              pair <int,int>
#define pll              pair <long long,long long>
#define sc               scanf
#define pf               printf
#define Pi               2*acos(0.0)
#define ms(a,b)          memset(a, b, sizeof(a))
#define pb(a)            push_back(a)
#define MP               make_pair
#define db               double
#define ll               long long
#define EPS              10E-10
#define ff               first
#define ss               second
#define sqr(x)           (x)*(x)
#define D(x)             cout<<#x " = "<<(x)<<endl
#define VI               vector <int>
#define DBG              pf("Hi\n")
#define MOD              1000000007
#define CIN              ios_base::sync_with_stdio(0); cin.tie(0); cout.tie(0)
#define SZ(a)            (int)a.size()
#define sf(a)            scanf("%d",&a)
#define sfl(a)           scanf("%lld",&a)
#define sff(a,b)         scanf("%d %d",&a,&b)
#define sffl(a,b)        scanf("%lld %lld",&a,&b)
#define sfff(a,b,c)      scanf("%d %d %d",&a,&b,&c)
#define sfffl(a,b,c)     scanf("%lld %lld %lld",&a,&b,&c)
#define stlloop(v)       for(__typeof(v.begin()) it=v.begin();it!=v.end();it++)
#define loop(i,n)        for(int i=0;i<n;i++)
#define loop1(i,n)       for(int i=1;i<=n;i++)
#define REP(i,a,b)       for(int i=a;i<b;i++)
#define RREP(i,a,b)      for(int i=a;i>=b;i--)
#define TEST_CASE(t)     for(int z=1;z<=t;z++)
#define PRINT_CASE       printf("Case %d: ",z)
#define LINE_PRINT_CASE  printf("Case %d:\n",z)
#define CASE_PRINT       cout<<"Case "<<z<<": "
#define all(a)           a.begin(),a.end()
#define intlim           2147483648
#define infinity         (1<<28)
#define ull              unsigned long long
#define gcd(a, b)        __gcd(a, b)
#define lcm(a, b)        ((a)*((b)/gcd(a,b)))

using namespace std;


/*----------------------Graph Moves----------------*/
//const int fx[]={+1,-1,+0,+0};
//const int fy[]={+0,+0,+1,-1};
//const int fx[]={+0,+0,+1,-1,-1,+1,-1,+1};   // Kings Move
//const int fy[]={-1,+1,+0,+0,+1,+1,-1,-1};  // Kings Move
//const int fx[]={-2, -2, -1, -1,  1,  1,  2,  2};  // Knights Move
//const int fy[]={-1,  1, -2,  2, -2,  2, -1,  1}; // Knights Move
/*------------------------------------------------*/

/*-----------------------Bitmask------------------*/
//int Set(int N,int pos){return N=N | (1<<pos);}
//int reset(int N,int pos){return N= N & ~(1<<pos);}
//bool check(int N,int pos){return (bool)(N & (1<<pos));}
/*------------------------------------------------*/

#define mx 100005

int suffix_array[mx],position[mx];

struct data
{
    pii rank;
    int pos;
};

bool cmp(data a, data b)
{
    return a.rank<b.rank;
}

char str[mx],query[mx];

int main()
{

//    freopen("in.txt","r",stdin);
//	  freopen("out.txt","w",stdout);

    int t;
    sf(t);
    TEST_CASE(t)
    {
        scanf("%s",str);

        int n=strlen(str);

        for(int i=0;i<n;i++) position[i]=str[i];

        for(int k=1;k<n;k*=2)
        {
            vector<data>v(n);
            for(int i=0;i<n;i++)
            {
                int a=position[i];
                int b;
                if(i+k>=n)
                    b=-1;
                else
                    b=position[i+k];
                v[i].rank=pii(a,b);
                v[i].pos=i;
            }

            sort(all(v),cmp);

            position[v[0].pos]=0;
            suffix_array[0]=v[0].pos;
            for(int i=1;i<n;i++)
            {
                suffix_array[i]=v[i].pos;
                if(v[i].rank==v[i-1].rank)
                    position[v[i].pos]=position[v[i-1].pos];
                else
                    position[v[i].pos]=i;
            }
        }

        int q;
        sf(q);
        while(q--)
        {
            scanf("%s",query);
            int l=strlen(query);
            int lo=0,hi=n-1;
            bool ans=0;
            while(lo<=hi)
            {
                int mid=(lo+hi)/2;
                int x=strncmp(str+suffix_array[mid],query,l);
                if(x==0)
                {
                    ans=1;
                    break;
                }
                else if(x>0)
                    hi=mid-1;
                else
                    lo=mid+1;
            }
            if(ans)
                pf("y\n");
            else
                pf("n\n");
        }
    }

    return 0;
}


Light OJ 1258 – Making Huge Palindromes

2

/*
Problem Link : http://www.lightoj.com/volume_showproblem.php?problem=1258
*/

#include <bits/stdc++.h>

#define pii pair <int,int>
#define sc scanf
#define pf printf
#define Pi 2*acos(0.0)
#define ms(a,b) memset(a, b, sizeof(a))
#define pb(a) push_back(a)
#define MP make_pair
#define oo 1<<29
#define dd double
#define ll long long
#define EPS 10E-10
#define ff first
#define ss second
#define MAX 1000007
#define CIN ios_base::sync_with_stdio(0)
#define SZ(a) (int)a.size()
#define getint(a) scanf("%d",&a)
#define getint2(a,b) scanf("%d%d",&a,&b)
#define getint3(a,b,c) scanf("%d%d%d",&a,&b,&c)
#define loop(i,n) for(int i=0;i<n;i++)
#define TEST_CASE(t) for(int z=1;z<=t;z++)
#define PRINT_CASE printf("Case %d: ",z)
#define all(a) a.begin(),a.end()
#define intlim 2147483648
#define inf 1000000
#define rtintlim 46340
#define llim 9223372036854775808
#define rtllim 3037000499
#define ull unsigned long long
#define I int

using namespace std;

/* Bits operation */
int Set(int n,int pos)  { return n = n | 1<<pos;}
bool check(int n,int pos) { return n & 1<<pos;}
int Reset(int n, int pos) { return n=n & ~(1<<pos);}
/*----------------*/

int lps[MAX];

void computePrefixFunction(string& P)
{
    lps[0]=-1;
    int k=-1;
    int n=SZ(P);
    for(int i=1;i<n;i++)
    {
        while(k>-1 && P[i]!=P[k+1])
            k=lps[k];
        if(P[i]==P[k+1])
            k++;
        lps[i]=k;
    }
}

 int KMP(string& T, string& P)
 {
     int n=SZ(T);
     int m=SZ(P);
     int k=-1;
     computePrefixFunction(P);
     int cnt=0;
     for(int i=0;i<n;i++)
     {
         while(k>-1 && T[i]!=P[k+1])
            k=lps[k];
         if(T[i]==P[k+1])
            k++;
         if(i==n-1)
            cnt=k;
     }
     return cnt+1;
 }
int main()
{
    ///freopen("in.txt","r",stdin);
    ///freopen("out.txt","w",stdout);
    int t;
    getint(t);
    TEST_CASE(t)
    {
        string txt,pattarn;
        cin>>txt;
        pattarn=txt;
        reverse(all(pattarn));
        int n=KMP(txt,pattarn);
        int m=SZ(txt);
        PRINT_CASE;
        cout<<m+m-n<<endl;
    }
    return 0;
}

Light OJ 1255 – Substring Frequency

0

/*
Link : http://www.lightoj.com/volume_showproblem.php?problem=1255
*/

#include <bits/stdc++.h>

#define pii pair <int,int>
#define sc scanf
#define pf printf
#define Pi 2*acos(0.0)
#define ms(a,b) memset(a, b, sizeof(a))
#define pb(a) push_back(a)
#define MP make_pair
#define oo 1<<29
#define dd double
#define ll long long
#define EPS 10E-10
#define ff first
#define ss second
#define MAX 1000007
#define CIN ios_base::sync_with_stdio(0)
#define SZ(a) (int)a.size()
#define getint(a) scanf("%d",&a)
#define getint2(a,b) scanf("%d%d",&a,&b)
#define getint3(a,b,c) scanf("%d%d%d",&a,&b,&c)
#define loop(i,n) for(int i=0;i<n;i++)
#define TEST_CASE(t) for(int z=1;z<=t;z++)
#define PRINT_CASE printf("Case %d: ",z)
#define all(a) a.begin(),a.end()
#define intlim 2147483648
#define inf 1000000
#define rtintlim 46340
#define llim 9223372036854775808
#define rtllim 3037000499
#define ull unsigned long long
#define I int

using namespace std;

/* Bits operation */
int Set(int n,int pos)  { return n = n | 1<<pos;}
bool check(int n,int pos) { return n & 1<<pos;}
int Reset(int n, int pos) { return n=n & ~(1<<pos);}
/*---------------*/

int lps[MAX];

void computePrefixFunction(string& P)
{
    int n=SZ(P);
    int k=-1;
    lps[0]=-1;
    for(int i=1;i<n;i++)
    {
        while(k>-1 && P[i]!=P[k+1])
            k=lps[k];
        if(P[i]==P[k+1])
            k++;
        lps[i]=k;
    }
}

int KMP( string& T, string& P)
{
    int n=SZ(T);
    int m=SZ(P);
    computePrefixFunction(P);
    int k=-1;
    int cnt=0;
    for(int i=0;i<n;i++)
    {
        while(k>-1 && T[i]!=P[k+1])
            k=lps[k];
        if(T[i]==P[k+1])
            k++;
        if(k==m-1)
        {
            cnt++;
            k=lps[k];
        }
    }
    return cnt;
}

int main()
{
    ///freopen("in.txt","r",stdin);
    ///freopen("out.txt","w",stdout);
    int t;
    getint(t);
    TEST_CASE(t)
    {
        string A,B;
        cin>>A>>B;
        PRINT_CASE;
        cout<<KMP(A,B)<<endl;
    }
    return 0;
}

KMP (knuth morris pratt)

0

/*
MMP""MM""YMM   db      `7MN.   `7MF'`7MMM.     ,MMF' .g8""8q.`YMM'   `MM'
P'   MM   `7  ;MM:       MMN.    M    MMMb    dPMM .dP'    `YM.VMA   ,V
     MM      ,V^MM.      M YMb   M    M YM   ,M MM dM'      `MM VMA ,V
     MM     ,M  `MM      M  `MN. M    M  Mb  M' MM MM        MM  VMMP
     MM     AbmmmqMA     M   `MM.M    M  YM.P'  MM MM.      ,MP   MM
     MM    A'     VML    M     YMM    M  `YM'   MM `Mb.    ,dP'   MM
   .JMML..AMA.   .AMMA..JML.    YM  .JML. `'  .JMML. `"bmmd"'   .JMML.
*/

#include <bits/stdc++.h>

#define pii pair <int,int>
#define sc scanf
#define pf printf
#define Pi 2*acos(0.0)
#define ms(a,b) memset(a, b, sizeof(a))
#define pb(a) push_back(a)
#define MP make_pair
#define oo 1<<29
#define dd double
#define ll long long
#define EPS 10E-10
#define ff first
#define ss second
#define MAX 1000007
#define CIN ios_base::sync_with_stdio(0)
#define SZ(a) (int)a.size()
#define getint(a) scanf("%d",&a)
#define getint2(a,b) scanf("%d%d",&a,&b)
#define getint3(a,b,c) scanf("%d%d%d",&a,&b,&c)
#define loop(i,n) for(int i=0;i<n;i++)
#define TEST_CASE(t) for(int z=1;z<=t;z++)
#define PRINT_CASE printf("Case %d: ",z)
#define all(a) a.begin(),a.end()
#define intlim 2147483648
#define inf 1000000
#define rtintlim 46340
#define llim 9223372036854775808
#define rtllim 3037000499
#define ull unsigned long long
#define I int

using namespace std;

/* Bits operation */
int Set(int n,int pos)  { return n = n | 1<<pos;}
bool check(int n,int pos) { return n & 1<<pos;}
int Reset(int n, int pos) { return n=n & ~(1<<pos);}
/*----------------*/

 int lps[MAX];

 void computePrefixFunction(string P)
 {
     int n=SZ(P);
     int k=-1;
     lps[0]=-1;
     for(int i=1;i<n;i++)
     {
         while(k>-1 && P[i]!=P[k+1])
            k=lps[k];
         if(P[i]==P[k+1])
            k++;
         lps[i]=k;
     }
 }

 void KMP(string& T, string& P)
 {
     int m=SZ(P);
     int n=SZ(T);
     computePrefixFunction(P);
     int k=-1;
     for(int i=0;i<n;i++)
     {
         while(k>-1 && T[i]!=P[k+1])
            k=lps[k];
         if(T[i]==P[k+1])
            k++;
         if(k==m-1)
         {
             cout<<"Pattern found in position "<<i-k<<endl;
             k=lps[k];
         }
     }
 }

int main()
{
    ///freopen("in.txt","r",stdin);
    ///freopen("out.txt","w",stdout);
    string text,pattarn;
    while(cin>>text>>pattarn)
    {
        KMP(text,pattarn);
    }
    return 0;
}